This reduces the clutter in gtk/.
+++ /dev/null
-#!/usr/bin/env python2
-# -*- coding: utf-8 -*-
-#
-# compose-parse.py, version 1.4
-#
-# multifunction script that helps manage the compose sequence table in GTK+ (gtk/gtkimcontextsimple.c)
-# the script produces statistics and information about the whole process, run with --help for more.
-#
-# You may need to switch your python installation to utf-8, if you get 'ascii' codec errors.
-#
-# Complain to Simos Xenitellis (simos@gnome.org, http://simos.info/blog) for this craft.
-
-from re import findall, match, split, sub
-from string import atoi
-from unicodedata import normalize
-from urllib import urlretrieve
-from os.path import isfile, getsize
-from copy import copy
-
-import sys
-import getopt
-
-# We grab files off the web, left and right.
-URL_COMPOSE = 'http://cgit.freedesktop.org/xorg/lib/libX11/plain/nls/en_US.UTF-8/Compose.pre'
-URL_KEYSYMSTXT = "http://www.cl.cam.ac.uk/~mgk25/ucs/keysyms.txt"
-URL_GDKKEYSYMSH = "http://git.gnome.org/browse/gtk%2B/plain/gdk/gdkkeysyms.h"
-URL_UNICODEDATATXT = 'http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt'
-FILENAME_COMPOSE_SUPPLEMENTARY = 'gtk-compose-lookaside.txt'
-FILENAME_COMPOSE_NEGATIVE_SUPPLEMENTARY = 'gtk-compose-remove.txt'
-
-# We currently support keysyms of size 2; once upstream xorg gets sorted,
-# we might produce some tables with size 2 and some with size 4.
-SIZEOFINT = 2
-
-# Current max compose sequence length; in case it gets increased.
-WIDTHOFCOMPOSETABLE = 5
-
-keysymdatabase = {}
-keysymunicodedatabase = {}
-unicodedatabase = {}
-
-headerfile_start = """/* GTK - The GIMP Tool Kit
- * Copyright (C) 2007, 2008 GNOME Foundation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- */
-
-/*
- * File auto-generated from script found at http://bugzilla.gnome.org/show_bug.cgi?id=321896
- * using the input files
- * Input : http://cgit.freedesktop.org/xorg/lib/libX11/plain/nls/en_US.UTF-8/Compose.pre
- * Input : http://www.cl.cam.ac.uk/~mgk25/ucs/keysyms.txt
- * Input : http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
- *
- * This table is optimised for space and requires special handling to access the content.
- * This table is used solely by http://svn.gnome.org/viewcvs/gtk%2B/trunk/gtk/gtkimcontextsimple.c
- *
- * The resulting file is placed at http://svn.gnome.org/viewcvs/gtk%2B/trunk/gtk/gtkimcontextsimpleseqs.h
- * This file is described in bug report http://bugzilla.gnome.org/show_bug.cgi?id=321896
- */
-
-/*
- * Modified by the GTK+ Team and others 2007, 2008. See the AUTHORS
- * file for a list of people on the GTK+ Team. See the ChangeLog
- * files for a list of changes. These files are distributed with
- * GTK+ at ftp://ftp.gtk.org/pub/gtk/.
- */
-
-#ifndef __GTK_IM_CONTEXT_SIMPLE_SEQS_H__
-#define __GTK_IM_CONTEXT_SIMPLE_SEQS_H__
-
-/* === These are the original comments of the file; we keep for historical purposes ===
- *
- * The following table was generated from the X compose tables include with
- * XFree86 4.0 using a set of Perl scripts. Contact Owen Taylor <otaylor@redhat.com>
- * to obtain the relevant perl scripts.
- *
- * The following compose letter letter sequences conflicted
- * Dstroke/dstroke and ETH/eth; resolved to Dstroke (Croatian, Vietnamese, Lappish), over
- * ETH (Icelandic, Faroese, old English, IPA) [ D- -D d- -d ]
- * Amacron/amacron and ordfeminine; resolved to ordfeminine [ _A A_ a_ _a ]
- * Amacron/amacron and Atilde/atilde; resolved to atilde [ -A A- a- -a ]
- * Omacron/Omacron and masculine; resolved to masculine [ _O O_ o_ _o ]
- * Omacron/omacron and Otilde/atilde; resolved to otilde [ -O O- o- -o ]
- *
- * [ Amacron and Omacron are in Latin-4 (Baltic). ordfeminine and masculine are used for
- * spanish. atilde and otilde are used at least for Portuguese ]
- *
- * at and Aring; resolved to Aring [ AA ]
- * guillemotleft and caron; resolved to guillemotleft [ << ]
- * ogonek and cedilla; resolved to cedilla [ ,, ]
- *
- * This probably should be resolved by first checking an additional set of compose tables
- * that depend on the locale or selected input method.
- */
-
-static const guint16 gtk_compose_seqs_compact[] = {"""
-
-headerfile_end = """};
-
-#endif /* __GTK_IM_CONTEXT_SIMPLE_SEQS_H__ */
-"""
-
-def stringtohex(str): return atoi(str, 16)
-
-def factorial(n):
- if n <= 1:
- return 1
- else:
- return n * factorial(n-1)
-
-def uniq(*args) :
- """ Performs a uniq operation on a list or lists """
- theInputList = []
- for theList in args:
- theInputList += theList
- theFinalList = []
- for elem in theInputList:
- if elem not in theFinalList:
- theFinalList.append(elem)
- return theFinalList
-
-
-
-def all_permutations(seq):
- """ Borrowed from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/252178 """
- """ Produces all permutations of the items of a list """
- if len(seq) <=1:
- yield seq
- else:
- for perm in all_permutations(seq[1:]):
- for i in range(len(perm)+1):
- #nb str[0:1] works in both string and list contexts
- yield perm[:i] + seq[0:1] + perm[i:]
-
-def usage():
- print """compose-parse available parameters:
- -h, --help this craft
- -s, --statistics show overall statistics (both algorithmic, non-algorithmic)
- -a, --algorithmic show sequences saved with algorithmic optimisation
- -g, --gtk show entries that go to GTK+
- -u, --unicodedatatxt show compose sequences derived from UnicodeData.txt (from unicode.org)
- -v, --verbose show verbose output
- -p, --plane1 show plane1 compose sequences
- -n, --numeric when used with --gtk, create file with numeric values only
- -e, --gtk-expanded when used with --gtk, create file that repeats first column; not usable in GTK+
-
- Default is to show statistics.
- """
-
-try:
- opts, args = getopt.getopt(sys.argv[1:], "pvgashune", ["help", "algorithmic", "statistics", "unicodedatatxt",
- "stats", "gtk", "verbose", "plane1", "numeric", "gtk-expanded"])
-except:
- usage()
- sys.exit(2)
-
-opt_statistics = False
-opt_algorithmic = False
-opt_gtk = False
-opt_unicodedatatxt = False
-opt_verbose = False
-opt_plane1 = False
-opt_numeric = False
-opt_gtkexpanded = False
-
-for o, a in opts:
- if o in ("-h", "--help"):
- usage()
- sys.exit()
- if o in ("-s", "--statistics"):
- opt_statistics = True
- if o in ("-a", "--algorithmic"):
- opt_algorithmic = True
- if o in ("-g", "--gtk"):
- opt_gtk = True
- if o in ("-u", "--unicodedatatxt"):
- opt_unicodedatatxt = True
- if o in ("-v", "--verbose"):
- opt_verbose = True
- if o in ("-p", "--plane1"):
- opt_plane1 = True
- if o in ("-n", "--numeric"):
- opt_numeric = True
- if o in ("-e", "--gtk-expanded"):
- opt_gtkexpanded = True
-
-if not opt_algorithmic and not opt_gtk and not opt_unicodedatatxt:
- opt_statistics = True
-
-def download_hook(blocks_transferred, block_size, file_size):
- """ A download hook to provide some feedback when downloading """
- if blocks_transferred == 0:
- if file_size > 0:
- if opt_verbose:
- print "Downloading", file_size, "bytes: ",
- else:
- if opt_verbose:
- print "Downloading: ",
- sys.stdout.write('#')
- sys.stdout.flush()
-
-
-def download_file(url):
- """ Downloads a file provided a URL. Returns the filename. """
- """ Borks on failure """
- localfilename = url.split('/')[-1]
- if not isfile(localfilename) or getsize(localfilename) <= 0:
- if opt_verbose:
- print "Downloading ", url, "..."
- try:
- urlretrieve(url, localfilename, download_hook)
- except IOError, (errno, strerror):
- print "I/O error(%s): %s" % (errno, strerror)
- sys.exit(-1)
- except:
- print "Unexpected error: ", sys.exc_info()[0]
- sys.exit(-1)
- print " done."
- else:
- if opt_verbose:
- print "Using cached file for ", url
- return localfilename
-
-def process_gdkkeysymsh():
- """ Opens the gdkkeysyms.h file from GTK+/gdk/gdkkeysyms.h """
- """ Fills up keysymdb with contents """
- filename_gdkkeysymsh = download_file(URL_GDKKEYSYMSH)
- try:
- gdkkeysymsh = open(filename_gdkkeysymsh, 'r')
- except IOError, (errno, strerror):
- print "I/O error(%s): %s" % (errno, strerror)
- sys.exit(-1)
- except:
- print "Unexpected error: ", sys.exc_info()[0]
- sys.exit(-1)
-
- """ Parse the gdkkeysyms.h file and place contents in keysymdb """
- linenum_gdkkeysymsh = 0
- keysymdb = {}
- for line in gdkkeysymsh.readlines():
- linenum_gdkkeysymsh += 1
- line = line.strip()
- if line == "" or not match('^#define GDK_KEY_', line):
- continue
- components = split('\s+', line)
- if len(components) < 3:
- print "Invalid line %(linenum)d in %(filename)s: %(line)s"\
- % {'linenum': linenum_gdkkeysymsh, 'filename': filename_gdkkeysymsh, 'line': line}
- print "Was expecting 3 items in the line"
- sys.exit(-1)
- if not match('^GDK_KEY_', components[1]):
- print "Invalid line %(linenum)d in %(filename)s: %(line)s"\
- % {'linenum': linenum_gdkkeysymsh, 'filename': filename_gdkkeysymsh, 'line': line}
- print "Was expecting a keysym starting with GDK_KEY_"
- sys.exit(-1)
- if match('^0x[0-9a-fA-F]+$', components[2]):
- unival = long(components[2][2:], 16)
- if unival == 0:
- continue
- keysymdb[components[1][8:]] = unival
- else:
- print "Invalid line %(linenum)d in %(filename)s: %(line)s"\
- % {'linenum': linenum_gdkkeysymsh, 'filename': filename_gdkkeysymsh, 'line': line}
- print "Was expecting a hexadecimal number at the end of the line"
- sys.exit(-1)
- gdkkeysymsh.close()
-
- """ Patch up the keysymdb with some of our own stuff """
-
- """ This is for a missing keysym from the currently upstream file """
- ###keysymdb['dead_stroke'] = 0x338
-
- """ This is for a missing keysym from the currently upstream file """
- ###keysymdb['dead_belowring'] = 0x323
- ###keysymdb['dead_belowmacron'] = 0x331
- ###keysymdb['dead_belowcircumflex'] = 0x32d
- ###keysymdb['dead_belowtilde'] = 0x330
- ###keysymdb['dead_belowbreve'] = 0x32e
- ###keysymdb['dead_belowdiaeresis'] = 0x324
-
- """ This is^Wwas preferential treatment for Greek """
- # keysymdb['dead_tilde'] = 0x342
- """ This is^was preferential treatment for Greek """
- #keysymdb['combining_tilde'] = 0x342
-
- """ Fixing VoidSymbol """
- keysymdb['VoidSymbol'] = 0xFFFF
-
- return keysymdb
-
-def process_keysymstxt():
- """ Grabs and opens the keysyms.txt file that Markus Kuhn maintains """
- """ This file keeps a record between keysyms <-> unicode chars """
- filename_keysymstxt = download_file(URL_KEYSYMSTXT)
- try:
- keysymstxt = open(filename_keysymstxt, 'r')
- except IOError, (errno, strerror):
- print "I/O error(%s): %s" % (errno, strerror)
- sys.exit(-1)
- except:
- print "Unexpected error: ", sys.exc_info()[0]
- sys.exit(-1)
-
- """ Parse the keysyms.txt file and place content in keysymdb """
- linenum_keysymstxt = 0
- keysymdb = {}
- for line in keysymstxt.readlines():
- linenum_keysymstxt += 1
- line = line.strip()
- if line == "" or match('^#', line):
- continue
- components = split('\s+', line)
- if len(components) < 5:
- print "Invalid line %(linenum)d in %(filename)s: %(line)s'"\
- % {'linenum': linenum_keysymstxt, 'filename': filename_keysymstxt, 'line': line}
- print "Was expecting 5 items in the line"
- sys.exit(-1)
- if match('^U[0-9a-fA-F]+$', components[1]):
- unival = long(components[1][1:], 16)
- if unival == 0:
- continue
- keysymdb[components[4]] = unival
- keysymstxt.close()
-
- """ Patch up the keysymdb with some of our own stuff """
- """ This is for a missing keysym from the currently upstream file """
- keysymdb['dead_belowring'] = 0x323
- keysymdb['dead_belowmacron'] = 0x331
- keysymdb['dead_belowcircumflex'] = 0x32d
- keysymdb['dead_belowtilde'] = 0x330
- keysymdb['dead_belowbreve'] = 0x32e
- keysymdb['dead_belowdiaeresis'] = 0x324
-
- """ This is preferential treatment for Greek """
- """ => we get more savings if used for Greek """
- # keysymdb['dead_tilde'] = 0x342
- """ This is preferential treatment for Greek """
- # keysymdb['combining_tilde'] = 0x342
-
- """ This is for a missing keysym from Markus Kuhn's db """
- keysymdb['dead_stroke'] = 0x338
- """ This is for a missing keysym from Markus Kuhn's db """
- keysymdb['Oslash'] = 0x0d8
- """ This is for a missing keysym from Markus Kuhn's db """
- keysymdb['Ssharp'] = 0x1e9e
-
- """ This is for a missing (recently added) keysym """
- keysymdb['dead_psili'] = 0x313
- """ This is for a missing (recently added) keysym """
- keysymdb['dead_dasia'] = 0x314
-
- """ Allows to import Multi_key sequences """
- keysymdb['Multi_key'] = 0xff20
-
- keysymdb['zerosubscript'] = 0x2080
- keysymdb['onesubscript'] = 0x2081
- keysymdb['twosubscript'] = 0x2082
- keysymdb['threesubscript'] = 0x2083
- keysymdb['foursubscript'] = 0x2084
- keysymdb['fivesubscript'] = 0x2085
- keysymdb['sixsubscript'] = 0x2086
- keysymdb['sevensubscript'] = 0x2087
- keysymdb['eightsubscript'] = 0x2088
- keysymdb['ninesubscript'] = 0x2089
- keysymdb['dead_doublegrave'] = 0x030F
- keysymdb['dead_invertedbreve'] = 0x0311
- keysymdb['dead_belowcomma'] = 0xfe6e
- keysymdb['dead_currency'] = 0xfe6f
- keysymdb['dead_greek'] = 0xfe8c
-
- return keysymdb
-
-def keysymvalue(keysym, file = "n/a", linenum = 0):
- """ Extracts a value from the keysym """
- """ Find the value of keysym, using the data from keysyms """
- """ Use file and linenum to when reporting errors """
- if keysym == "":
- return 0
- if keysymdatabase.has_key(keysym):
- return keysymdatabase[keysym]
- elif keysym[0] == 'U' and match('[0-9a-fA-F]+$', keysym[1:]):
- return atoi(keysym[1:], 16)
- elif keysym[:2] == '0x' and match('[0-9a-fA-F]+$', keysym[2:]):
- return atoi(keysym[2:], 16)
- else:
- print 'keysymvalue: UNKNOWN{%(keysym)s}' % { "keysym": keysym }
- #return -1
- sys.exit(-1)
-
-def keysymunicodevalue(keysym, file = "n/a", linenum = 0):
- """ Extracts a value from the keysym """
- """ Find the value of keysym, using the data from keysyms """
- """ Use file and linenum to when reporting errors """
- if keysym == "":
- return 0
- if keysymunicodedatabase.has_key(keysym):
- return keysymunicodedatabase[keysym]
- elif keysym[0] == 'U' and match('[0-9a-fA-F]+$', keysym[1:]):
- return atoi(keysym[1:], 16)
- elif keysym[:2] == '0x' and match('[0-9a-fA-F]+$', keysym[2:]):
- return atoi(keysym[2:], 16)
- else:
- print 'keysymunicodevalue: UNKNOWN{%(keysym)s}' % { "keysym": keysym }
- sys.exit(-1)
-
-def rename_combining(seq):
- filtered_sequence = []
- for ks in seq:
- if findall('^combining_', ks):
- ks = sub('^combining_', 'dead_', ks)
- if ks == 'dead_double_grave':
- ks = 'dead_doublegrave'
- if ks == 'dead_inverted_breve':
- ks = 'dead_invertedbreve'
- filtered_sequence.append(ks)
- return filtered_sequence
-
-
-keysymunicodedatabase = process_keysymstxt()
-keysymdatabase = process_gdkkeysymsh()
-
-""" Grab and open the compose file from upstream """
-filename_compose = download_file(URL_COMPOSE)
-try:
- composefile = open(filename_compose, 'r')
-except IOError, (errno, strerror):
- print "I/O error(%s): %s" % (errno, strerror)
- sys.exit(-1)
-except:
- print "Unexpected error: ", sys.exc_info()[0]
- sys.exit(-1)
-
-""" Look if there is a lookaside (supplementary) compose file in the current
- directory, and if so, open, then merge with upstream Compose file.
-"""
-xorg_compose_sequences_raw = []
-for seq in composefile.readlines():
- xorg_compose_sequences_raw.append(seq)
-
-try:
- composefile_lookaside = open(FILENAME_COMPOSE_NEGATIVE_SUPPLEMENTARY, 'r')
- for seq in composefile_lookaside.readlines():
- xorg_compose_sequences_raw.remove(seq)
-except IOError, (errno, strerror):
- if opt_verbose:
- print "I/O error(%s): %s" % (errno, strerror)
- print "Did not find negative lookaside compose file. Continuing..."
-except:
- print "Unexpected error: ", sys.exc_info()[0]
- sys.exit(-1)
-
-try:
- composefile_lookaside = open(FILENAME_COMPOSE_SUPPLEMENTARY, 'r')
- for seq in composefile_lookaside.readlines():
- xorg_compose_sequences_raw.append(seq)
-except IOError, (errno, strerror):
- if opt_verbose:
- print "I/O error(%s): %s" % (errno, strerror)
- print "Did not find lookaside compose file. Continuing..."
-except:
- print "Unexpected error: ", sys.exc_info()[0]
- sys.exit(-1)
-
-""" Parse the compose file in xorg_compose_sequences"""
-xorg_compose_sequences = []
-xorg_compose_sequences_algorithmic = []
-linenum_compose = 0
-comment_nest_depth = 0
-for line in xorg_compose_sequences_raw:
- linenum_compose += 1
- line = line.strip()
- if match("^XCOMM", line) or match("^#", line):
- continue
-
- line = sub(r"\/\*([^\*]*|[\*][^/])\*\/", "", line)
-
- comment_start = line.find("/*")
-
- if comment_start >= 0:
- if comment_nest_depth == 0:
- line = line[:comment_start]
- else:
- line = ""
-
- comment_nest_depth += 1
- else:
- comment_end = line.find("*/")
-
- if comment_end >= 0:
- comment_nest_depth -= 1
-
- if comment_nest_depth < 0:
- print "Invalid comment %(linenum_compose)d in %(filename)s: \
- Closing '*/' without opening '/*'" % { "linenum_compose": linenum_compose, "filename": filename_compose }
- exit(-1)
-
- if comment_nest_depth > 0:
- line = ""
- else:
- line = line[comment_end + 2:]
-
- if line is "":
- continue
-
- #line = line[:-1]
- components = split(':', line, 1)
- if len(components) != 2:
- print "Invalid line %(linenum_compose)d in %(filename)s: No sequence\
- /value pair found" % { "linenum_compose": linenum_compose, "filename": filename_compose }
- exit(-1)
- (seq, val ) = split(':', line, 1)
- seq = seq.strip()
- val = val.strip()
- raw_sequence = findall('\w+', seq)
- values = split('\s+', val)
- unichar_temp = split('"', values[0])
- unichar_utf8 = unichar_temp[1]
- if len(values) == 1:
- continue
- codepointstr = values[1]
- if values[1] == '#':
- # No codepoints that are >1 characters yet.
- continue
- if raw_sequence[0][0] == 'U' and match('[0-9a-fA-F]+$', raw_sequence[0][1:]):
- raw_sequence[0] = '0x' + raw_sequence[0][1:]
- if match('^U[0-9a-fA-F]+$', codepointstr):
- codepoint = long(codepointstr[1:], 16)
- elif keysymunicodedatabase.has_key(codepointstr):
- #if keysymdatabase[codepointstr] != keysymunicodedatabase[codepointstr]:
- #print "DIFFERENCE: 0x%(a)X 0x%(b)X" % { "a": keysymdatabase[codepointstr], "b": keysymunicodedatabase[codepointstr]},
- #print raw_sequence, codepointstr
- codepoint = keysymunicodedatabase[codepointstr]
- else:
- unichar = unicode(unichar_utf8, 'utf-8')
- codepoint = ord(unichar)
- sequence = rename_combining(raw_sequence)
- reject_this = False
- for i in sequence:
- if keysymvalue(i) > 0xFFFF:
- reject_this = True
- if opt_plane1:
- print sequence
- break
- if keysymvalue(i) < 0:
- reject_this = True
- break
- if reject_this:
- continue
- if "U0342" in sequence or \
- "U0313" in sequence or \
- "U0314" in sequence or \
- "0x0313" in sequence or \
- "0x0342" in sequence or \
- "0x0314" in sequence:
- continue
- if codepoint > 0xFFFF:
- if opt_verbose:
- print "Ignore the line greater than guint16:\n%s" % line
- continue
- #for i in range(len(sequence)):
- # if sequence[i] == "0x0342":
- # sequence[i] = "dead_tilde"
- if "Multi_key" not in sequence:
- """ Ignore for now >0xFFFF keysyms """
- if codepoint < 0xFFFF:
- original_sequence = copy(sequence)
- stats_sequence = copy(sequence)
- base = sequence.pop()
- basechar = keysymvalue(base, filename_compose, linenum_compose)
-
- if basechar < 0xFFFF:
- counter = 1
- unisequence = []
- not_normalised = True
- skipping_this = False
- for i in range(0, len(sequence)):
- """ If the sequence has dead_tilde and is for Greek, we don't do algorithmically
- because of lack of dead_perispomeni (i.e. conflict)
- """
- bc = basechar
- """if sequence[-1] == "dead_tilde" and (bc >= 0x370 and bc <= 0x3ff) or (bc >= 0x1f00 and bc <= 0x1fff):
- skipping_this = True
- break
- if sequence[-1] == "dead_horn" and (bc >= 0x370 and bc <= 0x3ff) or (bc >= 0x1f00 and bc <= 0x1fff):
- skipping_this = True
- break
- if sequence[-1] == "dead_ogonek" and (bc >= 0x370 and bc <= 0x3ff) or (bc >= 0x1f00 and bc <= 0x1fff):
- skipping_this = True
- break
- if sequence[-1] == "dead_psili":
- sequence[i] = "dead_horn"
- if sequence[-1] == "dead_dasia":
- sequence[-1] = "dead_ogonek"
- """
- unisequence.append(unichr(keysymunicodevalue(sequence.pop(), filename_compose, linenum_compose)))
-
- if skipping_this:
- unisequence = []
- for perm in all_permutations(unisequence):
- # print counter, original_sequence, unichr(basechar) + "".join(perm)
- # print counter, map(unichr, perm)
- normalized = normalize('NFC', unichr(basechar) + "".join(perm))
- if len(normalized) == 1:
- # print 'Base: %(base)s [%(basechar)s], produces [%(unichar)s] (0x%(codepoint)04X)' \
- # % { "base": base, "basechar": unichr(basechar), "unichar": unichar, "codepoint": codepoint },
- # print "Normalized: [%(normalized)s] SUCCESS %(c)d" % { "normalized": normalized, "c": counter }
- stats_sequence_data = map(keysymunicodevalue, stats_sequence)
- stats_sequence_data.append(normalized)
- xorg_compose_sequences_algorithmic.append(stats_sequence_data)
- not_normalised = False
- break;
- counter += 1
- if not_normalised:
- original_sequence.append(codepoint)
- xorg_compose_sequences.append(original_sequence)
- """ print xorg_compose_sequences[-1] """
-
- else:
- print "Error in base char !?!"
- exit(-2)
- else:
- print "OVER", sequence
- exit(-1)
- else:
- sequence.append(codepoint)
- xorg_compose_sequences.append(sequence)
- """ print xorg_compose_sequences[-1] """
-
-def sequence_cmp(x, y):
- if keysymvalue(x[0]) > keysymvalue(y[0]):
- return 1
- elif keysymvalue(x[0]) < keysymvalue(y[0]):
- return -1
- elif len(x) > len(y):
- return 1
- elif len(x) < len(y):
- return -1
- elif keysymvalue(x[1]) > keysymvalue(y[1]):
- return 1
- elif keysymvalue(x[1]) < keysymvalue(y[1]):
- return -1
- elif len(x) < 4:
- return 0
- elif keysymvalue(x[2]) > keysymvalue(y[2]):
- return 1
- elif keysymvalue(x[2]) < keysymvalue(y[2]):
- return -1
- elif len(x) < 5:
- return 0
- elif keysymvalue(x[3]) > keysymvalue(y[3]):
- return 1
- elif keysymvalue(x[3]) < keysymvalue(y[3]):
- return -1
- elif len(x) < 6:
- return 0
- elif keysymvalue(x[4]) > keysymvalue(y[4]):
- return 1
- elif keysymvalue(x[4]) < keysymvalue(y[4]):
- return -1
- else:
- return 0
-
-def sequence_unicode_cmp(x, y):
- if keysymunicodevalue(x[0]) > keysymunicodevalue(y[0]):
- return 1
- elif keysymunicodevalue(x[0]) < keysymunicodevalue(y[0]):
- return -1
- elif len(x) > len(y):
- return 1
- elif len(x) < len(y):
- return -1
- elif keysymunicodevalue(x[1]) > keysymunicodevalue(y[1]):
- return 1
- elif keysymunicodevalue(x[1]) < keysymunicodevalue(y[1]):
- return -1
- elif len(x) < 4:
- return 0
- elif keysymunicodevalue(x[2]) > keysymunicodevalue(y[2]):
- return 1
- elif keysymunicodevalue(x[2]) < keysymunicodevalue(y[2]):
- return -1
- elif len(x) < 5:
- return 0
- elif keysymunicodevalue(x[3]) > keysymunicodevalue(y[3]):
- return 1
- elif keysymunicodevalue(x[3]) < keysymunicodevalue(y[3]):
- return -1
- elif len(x) < 6:
- return 0
- elif keysymunicodevalue(x[4]) > keysymunicodevalue(y[4]):
- return 1
- elif keysymunicodevalue(x[4]) < keysymunicodevalue(y[4]):
- return -1
- else:
- return 0
-
-def sequence_algorithmic_cmp(x, y):
- if len(x) < len(y):
- return -1
- elif len(x) > len(y):
- return 1
- else:
- for i in range(len(x)):
- if x[i] < y[i]:
- return -1
- elif x[i] > y[i]:
- return 1
- return 0
-
-
-xorg_compose_sequences.sort(sequence_cmp)
-
-xorg_compose_sequences_uniqued = []
-first_time = True
-item = None
-for next_item in xorg_compose_sequences:
- if first_time:
- first_time = False
- item = next_item
- if sequence_unicode_cmp(item, next_item) != 0:
- xorg_compose_sequences_uniqued.append(item)
- item = next_item
-
-xorg_compose_sequences = copy(xorg_compose_sequences_uniqued)
-
-counter_multikey = 0
-for item in xorg_compose_sequences:
- if findall('Multi_key', "".join(item[:-1])) != []:
- counter_multikey += 1
-
-xorg_compose_sequences_algorithmic.sort(sequence_algorithmic_cmp)
-xorg_compose_sequences_algorithmic_uniqued = uniq(xorg_compose_sequences_algorithmic)
-
-firstitem = ""
-num_first_keysyms = 0
-zeroes = 0
-num_entries = 0
-num_algorithmic_greek = 0
-for sequence in xorg_compose_sequences:
- if keysymvalue(firstitem) != keysymvalue(sequence[0]):
- firstitem = sequence[0]
- num_first_keysyms += 1
- zeroes += 6 - len(sequence) + 1
- num_entries += 1
-
-for sequence in xorg_compose_sequences_algorithmic_uniqued:
- ch = ord(sequence[-1:][0])
- if ch >= 0x370 and ch <= 0x3ff or ch >= 0x1f00 and ch <= 0x1fff:
- num_algorithmic_greek += 1
-
-
-if opt_algorithmic:
- for sequence in xorg_compose_sequences_algorithmic_uniqued:
- letter = "".join(sequence[-1:])
- print '0x%(cp)04X, %(uni)s, seq: [ <0x%(base)04X>,' % { 'cp': ord(unicode(letter)), 'uni': letter.encode('utf-8'), 'base': sequence[-2] },
- for elem in sequence[:-2]:
- print "<0x%(keysym)04X>," % { 'keysym': elem },
- """ Yeah, verified... We just want to keep the output similar to -u, so we can compare/sort easily """
- print "], recomposed as", letter.encode('utf-8'), "verified"
-
-def num_of_keysyms(seq):
- return len(seq) - 1
-
-def convert_UnotationToHex(arg):
- if isinstance(arg, str):
- if match('^U[0-9A-F][0-9A-F][0-9A-F][0-9A-F]$', arg):
- return sub('^U', '0x', arg)
- return arg
-
-def addprefix_GDK(arg):
- if match('^0x', arg):
- return '%(arg)s, ' % { 'arg': arg }
- elif match('^U[0-9A-F][0-9A-F][0-9A-F][0-9A-F]$', arg.upper()):
- keysym = ''
- for k, c in keysymunicodedatabase.items():
- if c == keysymvalue(arg):
- keysym = k
- break
- if keysym != '':
- return 'GDK_KEY_%(arg)s, ' % { 'arg': keysym }
- else:
- return '0x%(arg)04X, ' % { 'arg': keysymvalue(arg) }
- else:
- return 'GDK_KEY_%(arg)s, ' % { 'arg': arg }
-
-if opt_gtk:
- first_keysym = ""
- sequence = []
- compose_table = []
- ct_second_part = []
- ct_sequence_width = 2
- start_offset = num_first_keysyms * (WIDTHOFCOMPOSETABLE+1)
- we_finished = False
- counter = 0
-
- sequence_iterator = iter(xorg_compose_sequences)
- sequence = sequence_iterator.next()
- while True:
- first_keysym = sequence[0] # Set the first keysym
- compose_table.append([first_keysym, 0, 0, 0, 0, 0])
- while sequence[0] == first_keysym:
- compose_table[counter][num_of_keysyms(sequence)-1] += 1
- try:
- sequence = sequence_iterator.next()
- except StopIteration:
- we_finished = True
- break
- if we_finished:
- break
- counter += 1
-
- ct_index = start_offset
- for line_num in range(len(compose_table)):
- for i in range(WIDTHOFCOMPOSETABLE):
- occurrences = compose_table[line_num][i+1]
- compose_table[line_num][i+1] = ct_index
- ct_index += occurrences * (i+2)
-
- for sequence in xorg_compose_sequences:
- ct_second_part.append(map(convert_UnotationToHex, sequence))
-
- print headerfile_start
- for i in compose_table:
- if opt_gtkexpanded:
- print "0x%(ks)04X," % { "ks": keysymvalue(i[0]) },
- print '%(str)s' % { 'str': "".join(map(lambda x : str(x) + ", ", i[1:])) }
- elif not match('^0x', i[0]):
- print 'GDK_KEY_%(str)s' % { 'str': "".join(map(lambda x : str(x) + ", ", i)) }
- else:
- print '%(str)s' % { 'str': "".join(map(lambda x : str(x) + ", ", i)) }
- for i in ct_second_part:
- if opt_numeric:
- for ks in i[1:][:-1]:
- print '0x%(seq)04X, ' % { 'seq': keysymvalue(ks) },
- print '0x%(cp)04X, ' % { 'cp':i[-1] }
- """
- for ks in i[:-1]:
- print '0x%(seq)04X, ' % { 'seq': keysymvalue(ks) },
- print '0x%(cp)04X, ' % { 'cp':i[-1] }
- """
- elif opt_gtkexpanded:
- print '%(seq)s0x%(cp)04X, ' % { 'seq': "".join(map(addprefix_GDK, i[:-1])), 'cp':i[-1] }
- else:
- print '%(seq)s0x%(cp)04X, ' % { 'seq': "".join(map(addprefix_GDK, i[:-1][1:])), 'cp':i[-1] }
- print headerfile_end
-
-def redecompose(codepoint):
- (name, decomposition, combiningclass) = unicodedatabase[codepoint]
- if decomposition[0] == '' or decomposition[0] == '0':
- return [codepoint]
- if match('<\w+>', decomposition[0]):
- numdecomposition = map(stringtohex, decomposition[1:])
- return map(redecompose, numdecomposition)
- numdecomposition = map(stringtohex, decomposition)
- return map(redecompose, numdecomposition)
-
-def process_unicodedata_file(verbose = False):
- """ Grab from wget http://www.unicode.org/Public/UNIDATA/UnicodeData.txt """
- filename_unicodedatatxt = download_file(URL_UNICODEDATATXT)
- try:
- unicodedatatxt = open(filename_unicodedatatxt, 'r')
- except IOError, (errno, strerror):
- print "I/O error(%s): %s" % (errno, strerror)
- sys.exit(-1)
- except:
- print "Unexpected error: ", sys.exc_info()[0]
- sys.exit(-1)
- for line in unicodedatatxt.readlines():
- if line[0] == "" or line[0] == '#':
- continue
- line = line[:-1]
- uniproperties = split(';', line)
- codepoint = stringtohex(uniproperties[0])
- """ We don't do Plane 1 or CJK blocks. The latter require reading additional files. """
- if codepoint > 0xFFFF or (codepoint >= 0x4E00 and codepoint <= 0x9FFF) or (codepoint >= 0xF900 and codepoint <= 0xFAFF):
- continue
- name = uniproperties[1]
- category = uniproperties[2]
- combiningclass = uniproperties[3]
- decomposition = uniproperties[5]
- unicodedatabase[codepoint] = [name, split('\s+', decomposition), combiningclass]
-
- counter_combinations = 0
- counter_combinations_greek = 0
- counter_entries = 0
- counter_entries_greek = 0
-
- for item in unicodedatabase.keys():
- (name, decomposition, combiningclass) = unicodedatabase[item]
- if decomposition[0] == '':
- continue
- print name, "is empty"
- elif match('<\w+>', decomposition[0]):
- continue
- print name, "has weird", decomposition[0]
- else:
- sequence = map(stringtohex, decomposition)
- chrsequence = map(unichr, sequence)
- normalized = normalize('NFC', "".join(chrsequence))
-
- """ print name, sequence, "Combining: ", "".join(chrsequence), normalized, len(normalized), """
- decomposedsequence = []
- for subseq in map(redecompose, sequence):
- for seqitem in subseq:
- if isinstance(seqitem, list):
- for i in seqitem:
- if isinstance(i, list):
- for j in i:
- decomposedsequence.append(j)
- else:
- decomposedsequence.append(i)
- else:
- decomposedsequence.append(seqitem)
- recomposedchar = normalize('NFC', "".join(map(unichr, decomposedsequence)))
- if len(recomposedchar) == 1 and len(decomposedsequence) > 1:
- counter_entries += 1
- counter_combinations += factorial(len(decomposedsequence)-1)
- ch = item
- if ch >= 0x370 and ch <= 0x3ff or ch >= 0x1f00 and ch <= 0x1fff:
- counter_entries_greek += 1
- counter_combinations_greek += factorial(len(decomposedsequence)-1)
- if verbose:
- print "0x%(cp)04X, %(uni)c, seq:" % { 'cp':item, 'uni':unichr(item) },
- print "[",
- for elem in decomposedsequence:
- print '<0x%(hex)04X>,' % { 'hex': elem },
- print "], recomposed as", recomposedchar,
- if unichr(item) == recomposedchar:
- print "verified"
-
- if verbose == False:
- print "Unicode statistics from UnicodeData.txt"
- print "Number of entries that can be algorithmically produced :", counter_entries
- print " of which are for Greek :", counter_entries_greek
- print "Number of compose sequence combinations requiring :", counter_combinations
- print " of which are for Greek :", counter_combinations_greek
- print "Note: We do not include partial compositions, "
- print "thus the slight discrepancy in the figures"
- print
-
-if opt_unicodedatatxt:
- process_unicodedata_file(True)
-
-if opt_statistics:
- print
- print "Total number of compose sequences (from file) :", len(xorg_compose_sequences) + len(xorg_compose_sequences_algorithmic)
- print " of which can be expressed algorithmically :", len(xorg_compose_sequences_algorithmic)
- print " of which cannot be expressed algorithmically :", len(xorg_compose_sequences)
- print " of which have Multi_key :", counter_multikey
- print
- print "Algorithmic (stats for Xorg Compose file)"
- print "Number of sequences off due to algo from file (len(array)) :", len(xorg_compose_sequences_algorithmic)
- print "Number of sequences off due to algo (uniq(sort(array))) :", len(xorg_compose_sequences_algorithmic_uniqued)
- print " of which are for Greek :", num_algorithmic_greek
- print
- process_unicodedata_file()
- print "Not algorithmic (stats from Xorg Compose file)"
- print "Number of sequences :", len(xorg_compose_sequences)
- print "Flat array looks like :", len(xorg_compose_sequences), "rows of 6 integers (2 bytes per int, or 12 bytes per row)"
- print "Flat array would have taken up (in bytes) :", num_entries * 2 * 6, "bytes from the GTK+ library"
- print "Number of items in flat array :", len(xorg_compose_sequences) * 6
- print " of which are zeroes :", zeroes, "or ", (100 * zeroes) / (len(xorg_compose_sequences) * 6), " per cent"
- print "Number of different first items :", num_first_keysyms
- print "Number of max bytes (if using flat array) :", num_entries * 2 * 6
- print "Number of savings :", zeroes * 2 - num_first_keysyms * 2 * 5
- print
- print "Memory needs if both algorithmic+optimised table in latest Xorg compose file"
- print " :", num_entries * 2 * 6 - zeroes * 2 + num_first_keysyms * 2 * 5
- print
- print "Existing (old) implementation in GTK+"
- print "Number of sequences in old gtkimcontextsimple.c :", 691
- print "The existing (old) implementation in GTK+ takes up :", 691 * 2 * 12, "bytes"
--- /dev/null
+#!/usr/bin/env python2
+# -*- coding: utf-8 -*-
+#
+# compose-parse.py, version 1.4
+#
+# multifunction script that helps manage the compose sequence table in GTK+ (gtk/gtkimcontextsimple.c)
+# the script produces statistics and information about the whole process, run with --help for more.
+#
+# You may need to switch your python installation to utf-8, if you get 'ascii' codec errors.
+#
+# Complain to Simos Xenitellis (simos@gnome.org, http://simos.info/blog) for this craft.
+
+from re import findall, match, split, sub
+from string import atoi
+from unicodedata import normalize
+from urllib import urlretrieve
+from os.path import isfile, getsize
+from copy import copy
+
+import sys
+import getopt
+
+# We grab files off the web, left and right.
+URL_COMPOSE = 'http://cgit.freedesktop.org/xorg/lib/libX11/plain/nls/en_US.UTF-8/Compose.pre'
+URL_KEYSYMSTXT = "http://www.cl.cam.ac.uk/~mgk25/ucs/keysyms.txt"
+URL_GDKKEYSYMSH = "http://git.gnome.org/browse/gtk%2B/plain/gdk/gdkkeysyms.h"
+URL_UNICODEDATATXT = 'http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt'
+FILENAME_COMPOSE_SUPPLEMENTARY = 'gtk-compose-lookaside.txt'
+FILENAME_COMPOSE_NEGATIVE_SUPPLEMENTARY = 'gtk-compose-remove.txt'
+
+# We currently support keysyms of size 2; once upstream xorg gets sorted,
+# we might produce some tables with size 2 and some with size 4.
+SIZEOFINT = 2
+
+# Current max compose sequence length; in case it gets increased.
+WIDTHOFCOMPOSETABLE = 5
+
+keysymdatabase = {}
+keysymunicodedatabase = {}
+unicodedatabase = {}
+
+headerfile_start = """/* GTK - The GIMP Tool Kit
+ * Copyright (C) 2007, 2008 GNOME Foundation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * File auto-generated from script found at http://bugzilla.gnome.org/show_bug.cgi?id=321896
+ * using the input files
+ * Input : http://cgit.freedesktop.org/xorg/lib/libX11/plain/nls/en_US.UTF-8/Compose.pre
+ * Input : http://www.cl.cam.ac.uk/~mgk25/ucs/keysyms.txt
+ * Input : http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
+ *
+ * This table is optimised for space and requires special handling to access the content.
+ * This table is used solely by http://svn.gnome.org/viewcvs/gtk%2B/trunk/gtk/gtkimcontextsimple.c
+ *
+ * The resulting file is placed at http://svn.gnome.org/viewcvs/gtk%2B/trunk/gtk/gtkimcontextsimpleseqs.h
+ * This file is described in bug report http://bugzilla.gnome.org/show_bug.cgi?id=321896
+ */
+
+/*
+ * Modified by the GTK+ Team and others 2007, 2008. See the AUTHORS
+ * file for a list of people on the GTK+ Team. See the ChangeLog
+ * files for a list of changes. These files are distributed with
+ * GTK+ at ftp://ftp.gtk.org/pub/gtk/.
+ */
+
+#ifndef __GTK_IM_CONTEXT_SIMPLE_SEQS_H__
+#define __GTK_IM_CONTEXT_SIMPLE_SEQS_H__
+
+/* === These are the original comments of the file; we keep for historical purposes ===
+ *
+ * The following table was generated from the X compose tables include with
+ * XFree86 4.0 using a set of Perl scripts. Contact Owen Taylor <otaylor@redhat.com>
+ * to obtain the relevant perl scripts.
+ *
+ * The following compose letter letter sequences conflicted
+ * Dstroke/dstroke and ETH/eth; resolved to Dstroke (Croatian, Vietnamese, Lappish), over
+ * ETH (Icelandic, Faroese, old English, IPA) [ D- -D d- -d ]
+ * Amacron/amacron and ordfeminine; resolved to ordfeminine [ _A A_ a_ _a ]
+ * Amacron/amacron and Atilde/atilde; resolved to atilde [ -A A- a- -a ]
+ * Omacron/Omacron and masculine; resolved to masculine [ _O O_ o_ _o ]
+ * Omacron/omacron and Otilde/atilde; resolved to otilde [ -O O- o- -o ]
+ *
+ * [ Amacron and Omacron are in Latin-4 (Baltic). ordfeminine and masculine are used for
+ * spanish. atilde and otilde are used at least for Portuguese ]
+ *
+ * at and Aring; resolved to Aring [ AA ]
+ * guillemotleft and caron; resolved to guillemotleft [ << ]
+ * ogonek and cedilla; resolved to cedilla [ ,, ]
+ *
+ * This probably should be resolved by first checking an additional set of compose tables
+ * that depend on the locale or selected input method.
+ */
+
+static const guint16 gtk_compose_seqs_compact[] = {"""
+
+headerfile_end = """};
+
+#endif /* __GTK_IM_CONTEXT_SIMPLE_SEQS_H__ */
+"""
+
+def stringtohex(str): return atoi(str, 16)
+
+def factorial(n):
+ if n <= 1:
+ return 1
+ else:
+ return n * factorial(n-1)
+
+def uniq(*args) :
+ """ Performs a uniq operation on a list or lists """
+ theInputList = []
+ for theList in args:
+ theInputList += theList
+ theFinalList = []
+ for elem in theInputList:
+ if elem not in theFinalList:
+ theFinalList.append(elem)
+ return theFinalList
+
+
+
+def all_permutations(seq):
+ """ Borrowed from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/252178 """
+ """ Produces all permutations of the items of a list """
+ if len(seq) <=1:
+ yield seq
+ else:
+ for perm in all_permutations(seq[1:]):
+ for i in range(len(perm)+1):
+ #nb str[0:1] works in both string and list contexts
+ yield perm[:i] + seq[0:1] + perm[i:]
+
+def usage():
+ print """compose-parse available parameters:
+ -h, --help this craft
+ -s, --statistics show overall statistics (both algorithmic, non-algorithmic)
+ -a, --algorithmic show sequences saved with algorithmic optimisation
+ -g, --gtk show entries that go to GTK+
+ -u, --unicodedatatxt show compose sequences derived from UnicodeData.txt (from unicode.org)
+ -v, --verbose show verbose output
+ -p, --plane1 show plane1 compose sequences
+ -n, --numeric when used with --gtk, create file with numeric values only
+ -e, --gtk-expanded when used with --gtk, create file that repeats first column; not usable in GTK+
+
+ Default is to show statistics.
+ """
+
+try:
+ opts, args = getopt.getopt(sys.argv[1:], "pvgashune", ["help", "algorithmic", "statistics", "unicodedatatxt",
+ "stats", "gtk", "verbose", "plane1", "numeric", "gtk-expanded"])
+except:
+ usage()
+ sys.exit(2)
+
+opt_statistics = False
+opt_algorithmic = False
+opt_gtk = False
+opt_unicodedatatxt = False
+opt_verbose = False
+opt_plane1 = False
+opt_numeric = False
+opt_gtkexpanded = False
+
+for o, a in opts:
+ if o in ("-h", "--help"):
+ usage()
+ sys.exit()
+ if o in ("-s", "--statistics"):
+ opt_statistics = True
+ if o in ("-a", "--algorithmic"):
+ opt_algorithmic = True
+ if o in ("-g", "--gtk"):
+ opt_gtk = True
+ if o in ("-u", "--unicodedatatxt"):
+ opt_unicodedatatxt = True
+ if o in ("-v", "--verbose"):
+ opt_verbose = True
+ if o in ("-p", "--plane1"):
+ opt_plane1 = True
+ if o in ("-n", "--numeric"):
+ opt_numeric = True
+ if o in ("-e", "--gtk-expanded"):
+ opt_gtkexpanded = True
+
+if not opt_algorithmic and not opt_gtk and not opt_unicodedatatxt:
+ opt_statistics = True
+
+def download_hook(blocks_transferred, block_size, file_size):
+ """ A download hook to provide some feedback when downloading """
+ if blocks_transferred == 0:
+ if file_size > 0:
+ if opt_verbose:
+ print "Downloading", file_size, "bytes: ",
+ else:
+ if opt_verbose:
+ print "Downloading: ",
+ sys.stdout.write('#')
+ sys.stdout.flush()
+
+
+def download_file(url):
+ """ Downloads a file provided a URL. Returns the filename. """
+ """ Borks on failure """
+ localfilename = url.split('/')[-1]
+ if not isfile(localfilename) or getsize(localfilename) <= 0:
+ if opt_verbose:
+ print "Downloading ", url, "..."
+ try:
+ urlretrieve(url, localfilename, download_hook)
+ except IOError, (errno, strerror):
+ print "I/O error(%s): %s" % (errno, strerror)
+ sys.exit(-1)
+ except:
+ print "Unexpected error: ", sys.exc_info()[0]
+ sys.exit(-1)
+ print " done."
+ else:
+ if opt_verbose:
+ print "Using cached file for ", url
+ return localfilename
+
+def process_gdkkeysymsh():
+ """ Opens the gdkkeysyms.h file from GTK+/gdk/gdkkeysyms.h """
+ """ Fills up keysymdb with contents """
+ filename_gdkkeysymsh = download_file(URL_GDKKEYSYMSH)
+ try:
+ gdkkeysymsh = open(filename_gdkkeysymsh, 'r')
+ except IOError, (errno, strerror):
+ print "I/O error(%s): %s" % (errno, strerror)
+ sys.exit(-1)
+ except:
+ print "Unexpected error: ", sys.exc_info()[0]
+ sys.exit(-1)
+
+ """ Parse the gdkkeysyms.h file and place contents in keysymdb """
+ linenum_gdkkeysymsh = 0
+ keysymdb = {}
+ for line in gdkkeysymsh.readlines():
+ linenum_gdkkeysymsh += 1
+ line = line.strip()
+ if line == "" or not match('^#define GDK_KEY_', line):
+ continue
+ components = split('\s+', line)
+ if len(components) < 3:
+ print "Invalid line %(linenum)d in %(filename)s: %(line)s"\
+ % {'linenum': linenum_gdkkeysymsh, 'filename': filename_gdkkeysymsh, 'line': line}
+ print "Was expecting 3 items in the line"
+ sys.exit(-1)
+ if not match('^GDK_KEY_', components[1]):
+ print "Invalid line %(linenum)d in %(filename)s: %(line)s"\
+ % {'linenum': linenum_gdkkeysymsh, 'filename': filename_gdkkeysymsh, 'line': line}
+ print "Was expecting a keysym starting with GDK_KEY_"
+ sys.exit(-1)
+ if match('^0x[0-9a-fA-F]+$', components[2]):
+ unival = long(components[2][2:], 16)
+ if unival == 0:
+ continue
+ keysymdb[components[1][8:]] = unival
+ else:
+ print "Invalid line %(linenum)d in %(filename)s: %(line)s"\
+ % {'linenum': linenum_gdkkeysymsh, 'filename': filename_gdkkeysymsh, 'line': line}
+ print "Was expecting a hexadecimal number at the end of the line"
+ sys.exit(-1)
+ gdkkeysymsh.close()
+
+ """ Patch up the keysymdb with some of our own stuff """
+
+ """ This is for a missing keysym from the currently upstream file """
+ ###keysymdb['dead_stroke'] = 0x338
+
+ """ This is for a missing keysym from the currently upstream file """
+ ###keysymdb['dead_belowring'] = 0x323
+ ###keysymdb['dead_belowmacron'] = 0x331
+ ###keysymdb['dead_belowcircumflex'] = 0x32d
+ ###keysymdb['dead_belowtilde'] = 0x330
+ ###keysymdb['dead_belowbreve'] = 0x32e
+ ###keysymdb['dead_belowdiaeresis'] = 0x324
+
+ """ This is^Wwas preferential treatment for Greek """
+ # keysymdb['dead_tilde'] = 0x342
+ """ This is^was preferential treatment for Greek """
+ #keysymdb['combining_tilde'] = 0x342
+
+ """ Fixing VoidSymbol """
+ keysymdb['VoidSymbol'] = 0xFFFF
+
+ return keysymdb
+
+def process_keysymstxt():
+ """ Grabs and opens the keysyms.txt file that Markus Kuhn maintains """
+ """ This file keeps a record between keysyms <-> unicode chars """
+ filename_keysymstxt = download_file(URL_KEYSYMSTXT)
+ try:
+ keysymstxt = open(filename_keysymstxt, 'r')
+ except IOError, (errno, strerror):
+ print "I/O error(%s): %s" % (errno, strerror)
+ sys.exit(-1)
+ except:
+ print "Unexpected error: ", sys.exc_info()[0]
+ sys.exit(-1)
+
+ """ Parse the keysyms.txt file and place content in keysymdb """
+ linenum_keysymstxt = 0
+ keysymdb = {}
+ for line in keysymstxt.readlines():
+ linenum_keysymstxt += 1
+ line = line.strip()
+ if line == "" or match('^#', line):
+ continue
+ components = split('\s+', line)
+ if len(components) < 5:
+ print "Invalid line %(linenum)d in %(filename)s: %(line)s'"\
+ % {'linenum': linenum_keysymstxt, 'filename': filename_keysymstxt, 'line': line}
+ print "Was expecting 5 items in the line"
+ sys.exit(-1)
+ if match('^U[0-9a-fA-F]+$', components[1]):
+ unival = long(components[1][1:], 16)
+ if unival == 0:
+ continue
+ keysymdb[components[4]] = unival
+ keysymstxt.close()
+
+ """ Patch up the keysymdb with some of our own stuff """
+ """ This is for a missing keysym from the currently upstream file """
+ keysymdb['dead_belowring'] = 0x323
+ keysymdb['dead_belowmacron'] = 0x331
+ keysymdb['dead_belowcircumflex'] = 0x32d
+ keysymdb['dead_belowtilde'] = 0x330
+ keysymdb['dead_belowbreve'] = 0x32e
+ keysymdb['dead_belowdiaeresis'] = 0x324
+
+ """ This is preferential treatment for Greek """
+ """ => we get more savings if used for Greek """
+ # keysymdb['dead_tilde'] = 0x342
+ """ This is preferential treatment for Greek """
+ # keysymdb['combining_tilde'] = 0x342
+
+ """ This is for a missing keysym from Markus Kuhn's db """
+ keysymdb['dead_stroke'] = 0x338
+ """ This is for a missing keysym from Markus Kuhn's db """
+ keysymdb['Oslash'] = 0x0d8
+ """ This is for a missing keysym from Markus Kuhn's db """
+ keysymdb['Ssharp'] = 0x1e9e
+
+ """ This is for a missing (recently added) keysym """
+ keysymdb['dead_psili'] = 0x313
+ """ This is for a missing (recently added) keysym """
+ keysymdb['dead_dasia'] = 0x314
+
+ """ Allows to import Multi_key sequences """
+ keysymdb['Multi_key'] = 0xff20
+
+ keysymdb['zerosubscript'] = 0x2080
+ keysymdb['onesubscript'] = 0x2081
+ keysymdb['twosubscript'] = 0x2082
+ keysymdb['threesubscript'] = 0x2083
+ keysymdb['foursubscript'] = 0x2084
+ keysymdb['fivesubscript'] = 0x2085
+ keysymdb['sixsubscript'] = 0x2086
+ keysymdb['sevensubscript'] = 0x2087
+ keysymdb['eightsubscript'] = 0x2088
+ keysymdb['ninesubscript'] = 0x2089
+ keysymdb['dead_doublegrave'] = 0x030F
+ keysymdb['dead_invertedbreve'] = 0x0311
+ keysymdb['dead_belowcomma'] = 0xfe6e
+ keysymdb['dead_currency'] = 0xfe6f
+ keysymdb['dead_greek'] = 0xfe8c
+
+ return keysymdb
+
+def keysymvalue(keysym, file = "n/a", linenum = 0):
+ """ Extracts a value from the keysym """
+ """ Find the value of keysym, using the data from keysyms """
+ """ Use file and linenum to when reporting errors """
+ if keysym == "":
+ return 0
+ if keysymdatabase.has_key(keysym):
+ return keysymdatabase[keysym]
+ elif keysym[0] == 'U' and match('[0-9a-fA-F]+$', keysym[1:]):
+ return atoi(keysym[1:], 16)
+ elif keysym[:2] == '0x' and match('[0-9a-fA-F]+$', keysym[2:]):
+ return atoi(keysym[2:], 16)
+ else:
+ print 'keysymvalue: UNKNOWN{%(keysym)s}' % { "keysym": keysym }
+ #return -1
+ sys.exit(-1)
+
+def keysymunicodevalue(keysym, file = "n/a", linenum = 0):
+ """ Extracts a value from the keysym """
+ """ Find the value of keysym, using the data from keysyms """
+ """ Use file and linenum to when reporting errors """
+ if keysym == "":
+ return 0
+ if keysymunicodedatabase.has_key(keysym):
+ return keysymunicodedatabase[keysym]
+ elif keysym[0] == 'U' and match('[0-9a-fA-F]+$', keysym[1:]):
+ return atoi(keysym[1:], 16)
+ elif keysym[:2] == '0x' and match('[0-9a-fA-F]+$', keysym[2:]):
+ return atoi(keysym[2:], 16)
+ else:
+ print 'keysymunicodevalue: UNKNOWN{%(keysym)s}' % { "keysym": keysym }
+ sys.exit(-1)
+
+def rename_combining(seq):
+ filtered_sequence = []
+ for ks in seq:
+ if findall('^combining_', ks):
+ ks = sub('^combining_', 'dead_', ks)
+ if ks == 'dead_double_grave':
+ ks = 'dead_doublegrave'
+ if ks == 'dead_inverted_breve':
+ ks = 'dead_invertedbreve'
+ filtered_sequence.append(ks)
+ return filtered_sequence
+
+
+keysymunicodedatabase = process_keysymstxt()
+keysymdatabase = process_gdkkeysymsh()
+
+""" Grab and open the compose file from upstream """
+filename_compose = download_file(URL_COMPOSE)
+try:
+ composefile = open(filename_compose, 'r')
+except IOError, (errno, strerror):
+ print "I/O error(%s): %s" % (errno, strerror)
+ sys.exit(-1)
+except:
+ print "Unexpected error: ", sys.exc_info()[0]
+ sys.exit(-1)
+
+""" Look if there is a lookaside (supplementary) compose file in the current
+ directory, and if so, open, then merge with upstream Compose file.
+"""
+xorg_compose_sequences_raw = []
+for seq in composefile.readlines():
+ xorg_compose_sequences_raw.append(seq)
+
+try:
+ composefile_lookaside = open(FILENAME_COMPOSE_NEGATIVE_SUPPLEMENTARY, 'r')
+ for seq in composefile_lookaside.readlines():
+ xorg_compose_sequences_raw.remove(seq)
+except IOError, (errno, strerror):
+ if opt_verbose:
+ print "I/O error(%s): %s" % (errno, strerror)
+ print "Did not find negative lookaside compose file. Continuing..."
+except:
+ print "Unexpected error: ", sys.exc_info()[0]
+ sys.exit(-1)
+
+try:
+ composefile_lookaside = open(FILENAME_COMPOSE_SUPPLEMENTARY, 'r')
+ for seq in composefile_lookaside.readlines():
+ xorg_compose_sequences_raw.append(seq)
+except IOError, (errno, strerror):
+ if opt_verbose:
+ print "I/O error(%s): %s" % (errno, strerror)
+ print "Did not find lookaside compose file. Continuing..."
+except:
+ print "Unexpected error: ", sys.exc_info()[0]
+ sys.exit(-1)
+
+""" Parse the compose file in xorg_compose_sequences"""
+xorg_compose_sequences = []
+xorg_compose_sequences_algorithmic = []
+linenum_compose = 0
+comment_nest_depth = 0
+for line in xorg_compose_sequences_raw:
+ linenum_compose += 1
+ line = line.strip()
+ if match("^XCOMM", line) or match("^#", line):
+ continue
+
+ line = sub(r"\/\*([^\*]*|[\*][^/])\*\/", "", line)
+
+ comment_start = line.find("/*")
+
+ if comment_start >= 0:
+ if comment_nest_depth == 0:
+ line = line[:comment_start]
+ else:
+ line = ""
+
+ comment_nest_depth += 1
+ else:
+ comment_end = line.find("*/")
+
+ if comment_end >= 0:
+ comment_nest_depth -= 1
+
+ if comment_nest_depth < 0:
+ print "Invalid comment %(linenum_compose)d in %(filename)s: \
+ Closing '*/' without opening '/*'" % { "linenum_compose": linenum_compose, "filename": filename_compose }
+ exit(-1)
+
+ if comment_nest_depth > 0:
+ line = ""
+ else:
+ line = line[comment_end + 2:]
+
+ if line is "":
+ continue
+
+ #line = line[:-1]
+ components = split(':', line, 1)
+ if len(components) != 2:
+ print "Invalid line %(linenum_compose)d in %(filename)s: No sequence\
+ /value pair found" % { "linenum_compose": linenum_compose, "filename": filename_compose }
+ exit(-1)
+ (seq, val ) = split(':', line, 1)
+ seq = seq.strip()
+ val = val.strip()
+ raw_sequence = findall('\w+', seq)
+ values = split('\s+', val)
+ unichar_temp = split('"', values[0])
+ unichar_utf8 = unichar_temp[1]
+ if len(values) == 1:
+ continue
+ codepointstr = values[1]
+ if values[1] == '#':
+ # No codepoints that are >1 characters yet.
+ continue
+ if raw_sequence[0][0] == 'U' and match('[0-9a-fA-F]+$', raw_sequence[0][1:]):
+ raw_sequence[0] = '0x' + raw_sequence[0][1:]
+ if match('^U[0-9a-fA-F]+$', codepointstr):
+ codepoint = long(codepointstr[1:], 16)
+ elif keysymunicodedatabase.has_key(codepointstr):
+ #if keysymdatabase[codepointstr] != keysymunicodedatabase[codepointstr]:
+ #print "DIFFERENCE: 0x%(a)X 0x%(b)X" % { "a": keysymdatabase[codepointstr], "b": keysymunicodedatabase[codepointstr]},
+ #print raw_sequence, codepointstr
+ codepoint = keysymunicodedatabase[codepointstr]
+ else:
+ unichar = unicode(unichar_utf8, 'utf-8')
+ codepoint = ord(unichar)
+ sequence = rename_combining(raw_sequence)
+ reject_this = False
+ for i in sequence:
+ if keysymvalue(i) > 0xFFFF:
+ reject_this = True
+ if opt_plane1:
+ print sequence
+ break
+ if keysymvalue(i) < 0:
+ reject_this = True
+ break
+ if reject_this:
+ continue
+ if "U0342" in sequence or \
+ "U0313" in sequence or \
+ "U0314" in sequence or \
+ "0x0313" in sequence or \
+ "0x0342" in sequence or \
+ "0x0314" in sequence:
+ continue
+ if codepoint > 0xFFFF:
+ if opt_verbose:
+ print "Ignore the line greater than guint16:\n%s" % line
+ continue
+ #for i in range(len(sequence)):
+ # if sequence[i] == "0x0342":
+ # sequence[i] = "dead_tilde"
+ if "Multi_key" not in sequence:
+ """ Ignore for now >0xFFFF keysyms """
+ if codepoint < 0xFFFF:
+ original_sequence = copy(sequence)
+ stats_sequence = copy(sequence)
+ base = sequence.pop()
+ basechar = keysymvalue(base, filename_compose, linenum_compose)
+
+ if basechar < 0xFFFF:
+ counter = 1
+ unisequence = []
+ not_normalised = True
+ skipping_this = False
+ for i in range(0, len(sequence)):
+ """ If the sequence has dead_tilde and is for Greek, we don't do algorithmically
+ because of lack of dead_perispomeni (i.e. conflict)
+ """
+ bc = basechar
+ """if sequence[-1] == "dead_tilde" and (bc >= 0x370 and bc <= 0x3ff) or (bc >= 0x1f00 and bc <= 0x1fff):
+ skipping_this = True
+ break
+ if sequence[-1] == "dead_horn" and (bc >= 0x370 and bc <= 0x3ff) or (bc >= 0x1f00 and bc <= 0x1fff):
+ skipping_this = True
+ break
+ if sequence[-1] == "dead_ogonek" and (bc >= 0x370 and bc <= 0x3ff) or (bc >= 0x1f00 and bc <= 0x1fff):
+ skipping_this = True
+ break
+ if sequence[-1] == "dead_psili":
+ sequence[i] = "dead_horn"
+ if sequence[-1] == "dead_dasia":
+ sequence[-1] = "dead_ogonek"
+ """
+ unisequence.append(unichr(keysymunicodevalue(sequence.pop(), filename_compose, linenum_compose)))
+
+ if skipping_this:
+ unisequence = []
+ for perm in all_permutations(unisequence):
+ # print counter, original_sequence, unichr(basechar) + "".join(perm)
+ # print counter, map(unichr, perm)
+ normalized = normalize('NFC', unichr(basechar) + "".join(perm))
+ if len(normalized) == 1:
+ # print 'Base: %(base)s [%(basechar)s], produces [%(unichar)s] (0x%(codepoint)04X)' \
+ # % { "base": base, "basechar": unichr(basechar), "unichar": unichar, "codepoint": codepoint },
+ # print "Normalized: [%(normalized)s] SUCCESS %(c)d" % { "normalized": normalized, "c": counter }
+ stats_sequence_data = map(keysymunicodevalue, stats_sequence)
+ stats_sequence_data.append(normalized)
+ xorg_compose_sequences_algorithmic.append(stats_sequence_data)
+ not_normalised = False
+ break;
+ counter += 1
+ if not_normalised:
+ original_sequence.append(codepoint)
+ xorg_compose_sequences.append(original_sequence)
+ """ print xorg_compose_sequences[-1] """
+
+ else:
+ print "Error in base char !?!"
+ exit(-2)
+ else:
+ print "OVER", sequence
+ exit(-1)
+ else:
+ sequence.append(codepoint)
+ xorg_compose_sequences.append(sequence)
+ """ print xorg_compose_sequences[-1] """
+
+def sequence_cmp(x, y):
+ if keysymvalue(x[0]) > keysymvalue(y[0]):
+ return 1
+ elif keysymvalue(x[0]) < keysymvalue(y[0]):
+ return -1
+ elif len(x) > len(y):
+ return 1
+ elif len(x) < len(y):
+ return -1
+ elif keysymvalue(x[1]) > keysymvalue(y[1]):
+ return 1
+ elif keysymvalue(x[1]) < keysymvalue(y[1]):
+ return -1
+ elif len(x) < 4:
+ return 0
+ elif keysymvalue(x[2]) > keysymvalue(y[2]):
+ return 1
+ elif keysymvalue(x[2]) < keysymvalue(y[2]):
+ return -1
+ elif len(x) < 5:
+ return 0
+ elif keysymvalue(x[3]) > keysymvalue(y[3]):
+ return 1
+ elif keysymvalue(x[3]) < keysymvalue(y[3]):
+ return -1
+ elif len(x) < 6:
+ return 0
+ elif keysymvalue(x[4]) > keysymvalue(y[4]):
+ return 1
+ elif keysymvalue(x[4]) < keysymvalue(y[4]):
+ return -1
+ else:
+ return 0
+
+def sequence_unicode_cmp(x, y):
+ if keysymunicodevalue(x[0]) > keysymunicodevalue(y[0]):
+ return 1
+ elif keysymunicodevalue(x[0]) < keysymunicodevalue(y[0]):
+ return -1
+ elif len(x) > len(y):
+ return 1
+ elif len(x) < len(y):
+ return -1
+ elif keysymunicodevalue(x[1]) > keysymunicodevalue(y[1]):
+ return 1
+ elif keysymunicodevalue(x[1]) < keysymunicodevalue(y[1]):
+ return -1
+ elif len(x) < 4:
+ return 0
+ elif keysymunicodevalue(x[2]) > keysymunicodevalue(y[2]):
+ return 1
+ elif keysymunicodevalue(x[2]) < keysymunicodevalue(y[2]):
+ return -1
+ elif len(x) < 5:
+ return 0
+ elif keysymunicodevalue(x[3]) > keysymunicodevalue(y[3]):
+ return 1
+ elif keysymunicodevalue(x[3]) < keysymunicodevalue(y[3]):
+ return -1
+ elif len(x) < 6:
+ return 0
+ elif keysymunicodevalue(x[4]) > keysymunicodevalue(y[4]):
+ return 1
+ elif keysymunicodevalue(x[4]) < keysymunicodevalue(y[4]):
+ return -1
+ else:
+ return 0
+
+def sequence_algorithmic_cmp(x, y):
+ if len(x) < len(y):
+ return -1
+ elif len(x) > len(y):
+ return 1
+ else:
+ for i in range(len(x)):
+ if x[i] < y[i]:
+ return -1
+ elif x[i] > y[i]:
+ return 1
+ return 0
+
+
+xorg_compose_sequences.sort(sequence_cmp)
+
+xorg_compose_sequences_uniqued = []
+first_time = True
+item = None
+for next_item in xorg_compose_sequences:
+ if first_time:
+ first_time = False
+ item = next_item
+ if sequence_unicode_cmp(item, next_item) != 0:
+ xorg_compose_sequences_uniqued.append(item)
+ item = next_item
+
+xorg_compose_sequences = copy(xorg_compose_sequences_uniqued)
+
+counter_multikey = 0
+for item in xorg_compose_sequences:
+ if findall('Multi_key', "".join(item[:-1])) != []:
+ counter_multikey += 1
+
+xorg_compose_sequences_algorithmic.sort(sequence_algorithmic_cmp)
+xorg_compose_sequences_algorithmic_uniqued = uniq(xorg_compose_sequences_algorithmic)
+
+firstitem = ""
+num_first_keysyms = 0
+zeroes = 0
+num_entries = 0
+num_algorithmic_greek = 0
+for sequence in xorg_compose_sequences:
+ if keysymvalue(firstitem) != keysymvalue(sequence[0]):
+ firstitem = sequence[0]
+ num_first_keysyms += 1
+ zeroes += 6 - len(sequence) + 1
+ num_entries += 1
+
+for sequence in xorg_compose_sequences_algorithmic_uniqued:
+ ch = ord(sequence[-1:][0])
+ if ch >= 0x370 and ch <= 0x3ff or ch >= 0x1f00 and ch <= 0x1fff:
+ num_algorithmic_greek += 1
+
+
+if opt_algorithmic:
+ for sequence in xorg_compose_sequences_algorithmic_uniqued:
+ letter = "".join(sequence[-1:])
+ print '0x%(cp)04X, %(uni)s, seq: [ <0x%(base)04X>,' % { 'cp': ord(unicode(letter)), 'uni': letter.encode('utf-8'), 'base': sequence[-2] },
+ for elem in sequence[:-2]:
+ print "<0x%(keysym)04X>," % { 'keysym': elem },
+ """ Yeah, verified... We just want to keep the output similar to -u, so we can compare/sort easily """
+ print "], recomposed as", letter.encode('utf-8'), "verified"
+
+def num_of_keysyms(seq):
+ return len(seq) - 1
+
+def convert_UnotationToHex(arg):
+ if isinstance(arg, str):
+ if match('^U[0-9A-F][0-9A-F][0-9A-F][0-9A-F]$', arg):
+ return sub('^U', '0x', arg)
+ return arg
+
+def addprefix_GDK(arg):
+ if match('^0x', arg):
+ return '%(arg)s, ' % { 'arg': arg }
+ elif match('^U[0-9A-F][0-9A-F][0-9A-F][0-9A-F]$', arg.upper()):
+ keysym = ''
+ for k, c in keysymunicodedatabase.items():
+ if c == keysymvalue(arg):
+ keysym = k
+ break
+ if keysym != '':
+ return 'GDK_KEY_%(arg)s, ' % { 'arg': keysym }
+ else:
+ return '0x%(arg)04X, ' % { 'arg': keysymvalue(arg) }
+ else:
+ return 'GDK_KEY_%(arg)s, ' % { 'arg': arg }
+
+if opt_gtk:
+ first_keysym = ""
+ sequence = []
+ compose_table = []
+ ct_second_part = []
+ ct_sequence_width = 2
+ start_offset = num_first_keysyms * (WIDTHOFCOMPOSETABLE+1)
+ we_finished = False
+ counter = 0
+
+ sequence_iterator = iter(xorg_compose_sequences)
+ sequence = sequence_iterator.next()
+ while True:
+ first_keysym = sequence[0] # Set the first keysym
+ compose_table.append([first_keysym, 0, 0, 0, 0, 0])
+ while sequence[0] == first_keysym:
+ compose_table[counter][num_of_keysyms(sequence)-1] += 1
+ try:
+ sequence = sequence_iterator.next()
+ except StopIteration:
+ we_finished = True
+ break
+ if we_finished:
+ break
+ counter += 1
+
+ ct_index = start_offset
+ for line_num in range(len(compose_table)):
+ for i in range(WIDTHOFCOMPOSETABLE):
+ occurrences = compose_table[line_num][i+1]
+ compose_table[line_num][i+1] = ct_index
+ ct_index += occurrences * (i+2)
+
+ for sequence in xorg_compose_sequences:
+ ct_second_part.append(map(convert_UnotationToHex, sequence))
+
+ print headerfile_start
+ for i in compose_table:
+ if opt_gtkexpanded:
+ print "0x%(ks)04X," % { "ks": keysymvalue(i[0]) },
+ print '%(str)s' % { 'str': "".join(map(lambda x : str(x) + ", ", i[1:])) }
+ elif not match('^0x', i[0]):
+ print 'GDK_KEY_%(str)s' % { 'str': "".join(map(lambda x : str(x) + ", ", i)) }
+ else:
+ print '%(str)s' % { 'str': "".join(map(lambda x : str(x) + ", ", i)) }
+ for i in ct_second_part:
+ if opt_numeric:
+ for ks in i[1:][:-1]:
+ print '0x%(seq)04X, ' % { 'seq': keysymvalue(ks) },
+ print '0x%(cp)04X, ' % { 'cp':i[-1] }
+ """
+ for ks in i[:-1]:
+ print '0x%(seq)04X, ' % { 'seq': keysymvalue(ks) },
+ print '0x%(cp)04X, ' % { 'cp':i[-1] }
+ """
+ elif opt_gtkexpanded:
+ print '%(seq)s0x%(cp)04X, ' % { 'seq': "".join(map(addprefix_GDK, i[:-1])), 'cp':i[-1] }
+ else:
+ print '%(seq)s0x%(cp)04X, ' % { 'seq': "".join(map(addprefix_GDK, i[:-1][1:])), 'cp':i[-1] }
+ print headerfile_end
+
+def redecompose(codepoint):
+ (name, decomposition, combiningclass) = unicodedatabase[codepoint]
+ if decomposition[0] == '' or decomposition[0] == '0':
+ return [codepoint]
+ if match('<\w+>', decomposition[0]):
+ numdecomposition = map(stringtohex, decomposition[1:])
+ return map(redecompose, numdecomposition)
+ numdecomposition = map(stringtohex, decomposition)
+ return map(redecompose, numdecomposition)
+
+def process_unicodedata_file(verbose = False):
+ """ Grab from wget http://www.unicode.org/Public/UNIDATA/UnicodeData.txt """
+ filename_unicodedatatxt = download_file(URL_UNICODEDATATXT)
+ try:
+ unicodedatatxt = open(filename_unicodedatatxt, 'r')
+ except IOError, (errno, strerror):
+ print "I/O error(%s): %s" % (errno, strerror)
+ sys.exit(-1)
+ except:
+ print "Unexpected error: ", sys.exc_info()[0]
+ sys.exit(-1)
+ for line in unicodedatatxt.readlines():
+ if line[0] == "" or line[0] == '#':
+ continue
+ line = line[:-1]
+ uniproperties = split(';', line)
+ codepoint = stringtohex(uniproperties[0])
+ """ We don't do Plane 1 or CJK blocks. The latter require reading additional files. """
+ if codepoint > 0xFFFF or (codepoint >= 0x4E00 and codepoint <= 0x9FFF) or (codepoint >= 0xF900 and codepoint <= 0xFAFF):
+ continue
+ name = uniproperties[1]
+ category = uniproperties[2]
+ combiningclass = uniproperties[3]
+ decomposition = uniproperties[5]
+ unicodedatabase[codepoint] = [name, split('\s+', decomposition), combiningclass]
+
+ counter_combinations = 0
+ counter_combinations_greek = 0
+ counter_entries = 0
+ counter_entries_greek = 0
+
+ for item in unicodedatabase.keys():
+ (name, decomposition, combiningclass) = unicodedatabase[item]
+ if decomposition[0] == '':
+ continue
+ print name, "is empty"
+ elif match('<\w+>', decomposition[0]):
+ continue
+ print name, "has weird", decomposition[0]
+ else:
+ sequence = map(stringtohex, decomposition)
+ chrsequence = map(unichr, sequence)
+ normalized = normalize('NFC', "".join(chrsequence))
+
+ """ print name, sequence, "Combining: ", "".join(chrsequence), normalized, len(normalized), """
+ decomposedsequence = []
+ for subseq in map(redecompose, sequence):
+ for seqitem in subseq:
+ if isinstance(seqitem, list):
+ for i in seqitem:
+ if isinstance(i, list):
+ for j in i:
+ decomposedsequence.append(j)
+ else:
+ decomposedsequence.append(i)
+ else:
+ decomposedsequence.append(seqitem)
+ recomposedchar = normalize('NFC', "".join(map(unichr, decomposedsequence)))
+ if len(recomposedchar) == 1 and len(decomposedsequence) > 1:
+ counter_entries += 1
+ counter_combinations += factorial(len(decomposedsequence)-1)
+ ch = item
+ if ch >= 0x370 and ch <= 0x3ff or ch >= 0x1f00 and ch <= 0x1fff:
+ counter_entries_greek += 1
+ counter_combinations_greek += factorial(len(decomposedsequence)-1)
+ if verbose:
+ print "0x%(cp)04X, %(uni)c, seq:" % { 'cp':item, 'uni':unichr(item) },
+ print "[",
+ for elem in decomposedsequence:
+ print '<0x%(hex)04X>,' % { 'hex': elem },
+ print "], recomposed as", recomposedchar,
+ if unichr(item) == recomposedchar:
+ print "verified"
+
+ if verbose == False:
+ print "Unicode statistics from UnicodeData.txt"
+ print "Number of entries that can be algorithmically produced :", counter_entries
+ print " of which are for Greek :", counter_entries_greek
+ print "Number of compose sequence combinations requiring :", counter_combinations
+ print " of which are for Greek :", counter_combinations_greek
+ print "Note: We do not include partial compositions, "
+ print "thus the slight discrepancy in the figures"
+ print
+
+if opt_unicodedatatxt:
+ process_unicodedata_file(True)
+
+if opt_statistics:
+ print
+ print "Total number of compose sequences (from file) :", len(xorg_compose_sequences) + len(xorg_compose_sequences_algorithmic)
+ print " of which can be expressed algorithmically :", len(xorg_compose_sequences_algorithmic)
+ print " of which cannot be expressed algorithmically :", len(xorg_compose_sequences)
+ print " of which have Multi_key :", counter_multikey
+ print
+ print "Algorithmic (stats for Xorg Compose file)"
+ print "Number of sequences off due to algo from file (len(array)) :", len(xorg_compose_sequences_algorithmic)
+ print "Number of sequences off due to algo (uniq(sort(array))) :", len(xorg_compose_sequences_algorithmic_uniqued)
+ print " of which are for Greek :", num_algorithmic_greek
+ print
+ process_unicodedata_file()
+ print "Not algorithmic (stats from Xorg Compose file)"
+ print "Number of sequences :", len(xorg_compose_sequences)
+ print "Flat array looks like :", len(xorg_compose_sequences), "rows of 6 integers (2 bytes per int, or 12 bytes per row)"
+ print "Flat array would have taken up (in bytes) :", num_entries * 2 * 6, "bytes from the GTK+ library"
+ print "Number of items in flat array :", len(xorg_compose_sequences) * 6
+ print " of which are zeroes :", zeroes, "or ", (100 * zeroes) / (len(xorg_compose_sequences) * 6), " per cent"
+ print "Number of different first items :", num_first_keysyms
+ print "Number of max bytes (if using flat array) :", num_entries * 2 * 6
+ print "Number of savings :", zeroes * 2 - num_first_keysyms * 2 * 5
+ print
+ print "Memory needs if both algorithmic+optimised table in latest Xorg compose file"
+ print " :", num_entries * 2 * 6 - zeroes * 2 + num_first_keysyms * 2 * 5
+ print
+ print "Existing (old) implementation in GTK+"
+ print "Number of sequences in old gtkimcontextsimple.c :", 691
+ print "The existing (old) implementation in GTK+ takes up :", 691 * 2 * 12, "bytes"
--- /dev/null
+#
+# This file contains the compose sequences that GTK+ used to have until GTK+ 2.12
+# but are not found anymore at the upstream Compose file at X.Org.
+# When updating gtkimcontextsimpleseqs.h with compose-parse.py,
+# we include this file as well. There are 15 conflicts currently
+# in the compose sequences, and we currently favour the sequences from
+# this file (against the upstream X.Org file). For more, see
+# http://bugzilla.gnome.org/show_bug.cgi?id=557420
+#
+
+<Greek_accentdieresis> <Greek_iota> : "ἴ" U0390
+<Greek_accentdieresis> <Greek_upsilon> : "ΐ" U03B0
+<Multi_key> <B> <period> : "Ḃ" U1E02
+<Multi_key> <b> <period> : "ḃ" U1E03
+<Multi_key> <D> <period> : "Ḋ" U1E0A
+<Multi_key> <d> <period> : "ḋ" U1E0B
+<Multi_key> <F> <period> : "Ḟ" U1E1E
+<Multi_key> <f> <period> : "ḟ" U1E1F
+<Multi_key> <M> <period> : "Ṁ" U1E40
+<Multi_key> <S> <period> : "Ṡ" U1E60
+<Multi_key> <P> <period> : "Ṗ" U1E56
+<Multi_key> <p> <period> : "ṗ" U1E57
+<Multi_key> <s> <period> : "ṡ" U1E61
+<Multi_key> <T> <period> : "Ṫ" U1E6A
+<Multi_key> <t> <period> : "ṫ" U1E6B
+<Multi_key> <e> <period> : "ė" U0117
+<Multi_key> <C> <bar> : "¢" U00A2
+<Multi_key> <bar> <C> : "¢" U00A2
+<Multi_key> <minus> <l> : "£" U00A3
+<Multi_key> <equal> <l> : "£" U00A3
+<Multi_key> <L> <equal> : "£" U00A3
+<Multi_key> <l> <minus> : "£" U00A3
+<Multi_key> <l> <equal> : "£" U00A3
+<Multi_key> <0> <X> : "¤" U00A4
+<Multi_key> <0> <x> : "¤" U00A4
+<Multi_key> <O> <X> : "¤" U00A4
+<Multi_key> <O> <x> : "¤" U00A4
+<Multi_key> <X> <0> : "¤" U00A4
+<Multi_key> <X> <O> : "¤" U00A4
+<Multi_key> <X> <o> : "¤" U00A4
+<Multi_key> <o> <X> : "¤" U00A4
+<Multi_key> <x> <0> : "¤" U00A4
+<Multi_key> <x> <O> : "¤" U00A4
+<Multi_key> <minus> <Y> : "¥" U00A5
+<Multi_key> <minus> <y> : "¥" U00A5
+<Multi_key> <equal> <y> : "¥" U00A5
+<Multi_key> <Y> <minus> : "¥" U00A5
+<Multi_key> <y> <minus> : "¥" U00A5
+<Multi_key> <y> <equal> : "¥" U00A5
+<Multi_key> <0> <S> : "§" U00A7
+<Multi_key> <0> <s> : "§" U00A7
+<Multi_key> <O> <S> : "§" U00A7
+<Multi_key> <S> <exclam> : "§" U00A7
+<Multi_key> <S> <0> : "§" U00A7
+<Multi_key> <S> <O> : "§" U00A7
+<Multi_key> <s> <exclam> : "§" U00A7
+<Multi_key> <s> <0> : "§" U00A7
+<Multi_key> <quotedbl> <quotedbl> : "¨" U00A8
+<Multi_key> <parenleft> <c> : "©" U00A9
+<Multi_key> <0> <C> : "©" U00A9
+<Multi_key> <0> <c> : "©" U00A9
+<Multi_key> <C> <0> : "©" U00A9
+<Multi_key> <C> <O> : "©" U00A9
+<Multi_key> <C> <o> : "©" U00A9
+<Multi_key> <c> <0> : "©" U00A9
+<Multi_key> <A> <underscore> : "ª" U00AA
+<Multi_key> <a> <underscore> : "ª" U00AA
+<Multi_key> <C> <comma> : "Ç" U00C7
+<Multi_key> <minus> <minus> <space> : "" U00AD
+<Multi_key> <parenleft> <r> : "®" U00AE
+<Multi_key> <R> <O> : "®" U00AE
+<Multi_key> <minus> <asciicircum> : "¯" U00AF
+<Multi_key> <asciicircum> <minus> : "¯" U00AF
+<Multi_key> <asciicircum> <underscore> : "¯" U00AF
+<Multi_key> <underscore> <asciicircum> : "¯" U00AF
+<Multi_key> <underscore> <underscore> : "¯" U00AF
+<Multi_key> <asterisk> <0> : "°" U00B0
+<Multi_key> <0> <asterisk> : "°" U00B0
+<Multi_key> <0> <asciicircum> : "°" U00B0
+<Multi_key> <minus> <plus> : "±" U00B1
+<Multi_key> <2> <S> : "²" U00B2
+<Multi_key> <2> <asciicircum> : "²" U00B2
+<Multi_key> <2> <s> : "²" U00B2
+<Multi_key> <S> <2> : "²" U00B2
+<Multi_key> <s> <2> : "²" U00B2
+<Multi_key> <3> <S> : "³" U00B3
+<Multi_key> <3> <asciicircum> : "³" U00B3
+<Multi_key> <3> <s> : "³" U00B3
+<Multi_key> <S> <3> : "³" U00B3
+<Multi_key> <s> <3> : "³" U00B3
+<Multi_key> <apostrophe> <apostrophe> : "´" U00B4
+<Multi_key> <slash> <U> : "µ" U00B5
+<Multi_key> <slash> <u> : "µ" U00B5
+<Multi_key> <U> <slash> : "µ" U00B5
+<Multi_key> <u> <slash> : "µ" U00B5
+<Multi_key> <exclam> <P> : "¶" U00B6
+<Multi_key> <exclam> <p> : "¶" U00B6
+<Multi_key> <period> <asciicircum> : "·" U00B7
+<Multi_key> <asciicircum> <period> : "·" U00B7
+<Multi_key> <comma> <comma> : "¸" U00B8
+<Multi_key> <1> <S> : "¹" U00B9
+<Multi_key> <1> <asciicircum> : "¹" U00B9
+<Multi_key> <1> <s> : "¹" U00B9
+<Multi_key> <S> <1> : "¹" U00B9
+<Multi_key> <s> <1> : "¹" U00B9
+<Multi_key> <O> <underscore> : "º" U00BA
+<Multi_key> <o> <underscore> : "º" U00BA
+<Multi_key> <A> <grave> : "À" U00C0
+<Multi_key> <A> <apostrophe> : "Á" U00C1
+<Multi_key> <A> <acute> : "Á" U00C1
+<Multi_key> <greater> <A> : "Â" U00C2
+<Multi_key> <A> <greater> : "Â" U00C2
+<Multi_key> <A> <asciicircum> : "Â" U00C2
+<Multi_key> <minus> <A> : "Ã" U00C3
+<Multi_key> <A> <minus> : "Ã" U00C3
+<Multi_key> <A> <asciitilde> : "Ã" U00C3
+<Multi_key> <A> <quotedbl> : "Ä" U00C4
+<Multi_key> <A> <diaeresis> : "Ä" U00C4
+<Multi_key> <diaeresis> <A> : "Ä" U00C4
+<Multi_key> <asterisk> <A> : "Å" U00C5
+<Multi_key> <A> <asterisk> : "Å" U00C5
+<Multi_key> <A> <A> : "Å" U00C5
+<Multi_key> <space> <less> : "ˇ" U02C7
+<Multi_key> <less> <space> : "ˇ" U02C7
+<Multi_key> <E> <grave> : "È" U00C8
+<Multi_key> <E> <apostrophe> : "É" U00C9
+<Multi_key> <E> <acute> : "É" U00C9
+<Multi_key> <greater> <E> : "Ê" U00CA
+<Multi_key> <E> <greater> : "Ê" U00CA
+<Multi_key> <E> <asciicircum> : "Ê" U00CA
+<Multi_key> <E> <quotedbl> : "Ë" U00CB
+<Multi_key> <E> <diaeresis> : "Ë" U00CB
+<Multi_key> <diaeresis> <E> : "Ë" U00CB
+<Multi_key> <I> <grave> : "Ì" U00CC
+<Multi_key> <I> <apostrophe> : "Í" U00CD
+<Multi_key> <I> <acute> : "Í" U00CD
+<Multi_key> <greater> <I> : "Î" U00CE
+<Multi_key> <I> <greater> : "Î" U00CE
+<Multi_key> <I> <asciicircum> : "Î" U00CE
+<Multi_key> <I> <quotedbl> : "Ï" U00CF
+<Multi_key> <I> <diaeresis> : "Ï" U00CF
+<Multi_key> <diaeresis> <I> : "Ï" U00CF
+<Multi_key> <minus> <N> : "Ñ" U00D1
+<Multi_key> <N> <minus> : "Ñ" U00D1
+<Multi_key> <N> <asciitilde> : "Ñ" U00D1
+<Multi_key> <O> <grave> : "Ò" U00D2
+<Multi_key> <O> <apostrophe> : "Ó" U00D3
+<Multi_key> <O> <acute> : "Ó" U00D3
+<Multi_key> <greater> <O> : "Ô" U00D4
+<Multi_key> <O> <greater> : "Ô" U00D4
+<Multi_key> <O> <asciicircum> : "Ô" U00D4
+<Multi_key> <minus> <O> : "Õ" U00D5
+<Multi_key> <O> <minus> : "Õ" U00D5
+<Multi_key> <O> <asciitilde> : "Õ" U00D5
+<Multi_key> <O> <quotedbl> : "Ö" U00D6
+<Multi_key> <O> <diaeresis> : "Ö" U00D6
+<Multi_key> <diaeresis> <O> : "Ö" U00D6
+<Multi_key> <space> <parenleft> : "˘" U02D8
+<Multi_key> <parenleft> <space> : "˘" U02D8
+<Multi_key> <U> <grave> : "Ù" U00D9
+<Multi_key> <U> <apostrophe> : "Ú" U00DA
+<Multi_key> <U> <acute> : "Ú" U00DA
+<Multi_key> <greater> <U> : "Û" U00DB
+<Multi_key> <U> <greater> : "Û" U00DB
+<Multi_key> <U> <asciicircum> : "Û" U00DB
+<Multi_key> <U> <quotedbl> : "Ü" U00DC
+<Multi_key> <U> <diaeresis> : "Ü" U00DC
+<Multi_key> <diaeresis> <U> : "Ü" U00DC
+<Multi_key> <Y> <apostrophe> : "Ý" U00DD
+<Multi_key> <Y> <acute> : "Ý" U00DD
+<Multi_key> <a> <grave> : "à" U00E0
+<Multi_key> <a> <apostrophe> : "á" U00E1
+<Multi_key> <a> <acute> : "á" U00E1
+<Multi_key> <greater> <a> : "â" U00E2
+<Multi_key> <a> <greater> : "â" U00E2
+<Multi_key> <a> <asciicircum> : "â" U00E2
+<Multi_key> <minus> <a> : "ā" U0101
+<Multi_key> <a> <minus> : "ā" U0101
+<Multi_key> <a> <asciitilde> : "ã" U00E3
+<Multi_key> <a> <quotedbl> : "ä" U00E4
+<Multi_key> <a> <diaeresis> : "ä" U00E4
+<Multi_key> <diaeresis> <a> : "ä" U00E4
+<Multi_key> <asterisk> <a> : "å" U00E5
+<Multi_key> <a> <asterisk> : "å" U00E5
+<Multi_key> <a> <a> : "å" U00E5
+<Multi_key> <c> <comma> : "ç" U00E7
+<Multi_key> <e> <grave> : "è" U00E8
+<Multi_key> <e> <apostrophe> : "é" U00E9
+<Multi_key> <e> <acute> : "é" U00E9
+<Multi_key> <greater> <e> : "ê" U00EA
+<Multi_key> <e> <greater> : "ê" U00EA
+<Multi_key> <e> <asciicircum> : "ê" U00EA
+<Multi_key> <e> <quotedbl> : "ë" U00EB
+<Multi_key> <e> <diaeresis> : "ë" U00EB
+<Multi_key> <diaeresis> <e> : "ë" U00EB
+<Multi_key> <i> <grave> : "ì" U00EC
+<Multi_key> <i> <apostrophe> : "í" U00ED
+<Multi_key> <i> <acute> : "í" U00ED
+<Multi_key> <greater> <i> : "î" U00EE
+<Multi_key> <i> <greater> : "î" U00EE
+<Multi_key> <i> <asciicircum> : "î" U00EE
+<Multi_key> <i> <quotedbl> : "ï" U00EF
+<Multi_key> <i> <diaeresis> : "ï" U00EF
+<Multi_key> <diaeresis> <i> : "ï" U00EF
+<Multi_key> <minus> <n> : "ñ" U00F1
+<Multi_key> <n> <minus> : "ñ" U00F1
+<Multi_key> <n> <asciitilde> : "ñ" U00F1
+<Multi_key> <o> <grave> : "ò" U00F2
+<Multi_key> <o> <apostrophe> : "ó" U00F3
+<Multi_key> <o> <acute> : "ó" U00F3
+<Multi_key> <greater> <o> : "ô" U00F4
+<Multi_key> <o> <greater> : "ô" U00F4
+<Multi_key> <o> <asciicircum> : "ô" U00F4
+<Multi_key> <minus> <o> : "ō" U014D
+<Multi_key> <o> <minus> : "ō" U014D
+<Multi_key> <o> <asciitilde> : "õ" U00F5
+<Multi_key> <o> <quotedbl> : "ö" U00F6
+<Multi_key> <o> <diaeresis> : "ö" U00F6
+<Multi_key> <diaeresis> <o> : "ö" U00F6
+<Multi_key> <o> <slash> : "ø" U00F8
+<Multi_key> <u> <grave> : "ù" U00F9
+<Multi_key> <u> <apostrophe> : "ú" U00FA
+<Multi_key> <u> <acute> : "ú" U00FA
+<Multi_key> <greater> <u> : "û" U00FB
+<Multi_key> <u> <greater> : "û" U00FB
+<Multi_key> <u> <asciicircum> : "û" U00FB
+<Multi_key> <u> <quotedbl> : "ü" U00FC
+<Multi_key> <u> <diaeresis> : "ü" U00FC
+<Multi_key> <diaeresis> <u> : "ü" U00FC
+<Multi_key> <y> <apostrophe> : "ý" U00FD
+<Multi_key> <y> <acute> : "ý" U00FD
+<Multi_key> <y> <quotedbl> : "ÿ" U00FF
+<Multi_key> <y> <diaeresis> : "ÿ" U00FF
+<Multi_key> <diaeresis> <y> : "ÿ" U00FF
+<Multi_key> <parenleft> <A> : "Ă" U0102
+<Multi_key> <A> <parenleft> : "Ă" U0102
+<Multi_key> <parenleft> <a> : "ă" U0103
+<Multi_key> <a> <parenleft> : "ă" U0103
+<Multi_key> <comma> <A> : "Ą" U0104
+<Multi_key> <A> <comma> : "Ą" U0104
+<Multi_key> <comma> <a> : "ą" U0105
+<Multi_key> <a> <comma> : "ą" U0105
+<Multi_key> <C> <apostrophe> : "Ć" U0106
+<Multi_key> <c> <apostrophe> : "ć" U0107
+<Multi_key> <C> <period> : "Ċ" U010A
+<Multi_key> <c> <period> : "ċ" U010B
+<Multi_key> <less> <C> : "Č" U010C
+<Multi_key> <C> <less> : "Č" U010C
+<Multi_key> <less> <c> : "č" U010D
+<Multi_key> <c> <less> : "č" U010D
+<Multi_key> <less> <D> : "Ď" U010E
+<Multi_key> <D> <less> : "Ď" U010E
+<Multi_key> <less> <d> : "ď" U010F
+<Multi_key> <d> <less> : "ď" U010F
+<Multi_key> <minus> <D> : "Đ" U0110
+<Multi_key> <D> <minus> : "Đ" U0110
+<Multi_key> <minus> <d> : "đ" U0111
+<Multi_key> <minus> <E> : "Ē" U0112
+<Multi_key> <E> <minus> : "Ē" U0112
+<Multi_key> <E> <underscore> : "Ē" U0112
+<Multi_key> <minus> <e> : "ē" U0113
+<Multi_key> <e> <minus> : "ē" U0113
+<Multi_key> <e> <underscore> : "ē" U0113
+<Multi_key> <E> <period> : "Ė" U0116
+<Multi_key> <E> <comma> : "Ę" U0118
+<Multi_key> <e> <comma> : "ę" U0119
+<Multi_key> <less> <E> : "Ě" U011A
+<Multi_key> <E> <less> : "Ě" U011A
+<Multi_key> <less> <e> : "ě" U011B
+<Multi_key> <e> <less> : "ě" U011B
+<Multi_key> <parenleft> <G> : "Ğ" U011E
+<Multi_key> <G> <parenleft> : "Ğ" U011E
+<Multi_key> <G> <U> : "Ğ" U011E
+<Multi_key> <G> <breve> : "Ğ" U011E
+<Multi_key> <breve> <G> : "Ğ" U011E
+<Multi_key> <parenleft> <g> : "ğ" U011F
+<Multi_key> <g> <parenleft> : "ğ" U011F
+<Multi_key> <g> <U> : "ğ" U011F
+<Multi_key> <g> <breve> : "ğ" U011F
+<Multi_key> <breve> <g> : "ğ" U011F
+<Multi_key> <G> <period> : "Ġ" U0120
+<Multi_key> <g> <period> : "ġ" U0121
+<Multi_key> <G> <comma> : "Ģ" U0122
+<Multi_key> <g> <comma> : "ģ" U0123
+<Multi_key> <I> <asciitilde> : "Ĩ" U0128
+<Multi_key> <i> <asciitilde> : "ĩ" U0129
+<Multi_key> <minus> <I> : "Ī" U012A
+<Multi_key> <I> <minus> : "Ī" U012A
+<Multi_key> <I> <underscore> : "Ī" U012A
+<Multi_key> <minus> <i> : "ī" U012B
+<Multi_key> <i> <minus> : "ī" U012B
+<Multi_key> <i> <underscore> : "ī" U012B
+<Multi_key> <comma> <I> : "Į" U012E
+<Multi_key> <I> <comma> : "Į" U012E
+<Multi_key> <I> <period> : "İ" U0130
+<Multi_key> <period> <i> : "ı" U0131
+<Multi_key> <K> <comma> : "Ķ" U0136
+<Multi_key> <k> <comma> : "ķ" U0137
+<Multi_key> <L> <apostrophe> : "Ĺ" U0139
+<Multi_key> <l> <apostrophe> : "ĺ" U013A
+<Multi_key> <L> <comma> : "Ļ" U013B
+<Multi_key> <l> <comma> : "ļ" U013C
+<Multi_key> <less> <L> : "Ľ" U013D
+<Multi_key> <L> <less> : "Ľ" U013D
+<Multi_key> <less> <l> : "ľ" U013E
+<Multi_key> <l> <less> : "ľ" U013E
+<Multi_key> <L> <slash> : "Ł" U0141
+<Multi_key> <l> <slash> : "ł" U0142
+<Multi_key> <N> <apostrophe> : "Ń" U0143
+<Multi_key> <n> <apostrophe> : "ń" U0144
+<Multi_key> <N> <comma> : "Ņ" U0145
+<Multi_key> <n> <comma> : "ņ" U0146
+<Multi_key> <less> <N> : "Ň" U0147
+<Multi_key> <N> <less> : "Ň" U0147
+<Multi_key> <less> <n> : "ň" U0148
+<Multi_key> <n> <less> : "ň" U0148
+<Multi_key> <R> <apostrophe> : "Ŕ" U0154
+<Multi_key> <r> <apostrophe> : "ŕ" U0155
+<Multi_key> <R> <comma> : "Ŗ" U0156
+<Multi_key> <r> <comma> : "ŗ" U0157
+<Multi_key> <less> <R> : "Ř" U0158
+<Multi_key> <R> <less> : "Ř" U0158
+<Multi_key> <less> <r> : "ř" U0159
+<Multi_key> <r> <less> : "ř" U0159
+<Multi_key> <S> <apostrophe> : "Ś" U015A
+<Multi_key> <s> <apostrophe> : "ś" U015B
+<Multi_key> <O> <slash> : "Ø" U00D8
+<Multi_key> <S> <comma> : "Ş" U015E
+<Multi_key> <S> <cedilla> : "Ş" U015E
+<Multi_key> <s> <comma> : "ş" U015F
+<Multi_key> <s> <cedilla> : "ş" U015F
+<Multi_key> <less> <S> : "Š" U0160
+<Multi_key> <S> <less> : "Š" U0160
+<Multi_key> <less> <s> : "š" U0161
+<Multi_key> <s> <less> : "š" U0161
+<Multi_key> <less> <T> : "Ť" U0164
+<Multi_key> <T> <less> : "Ť" U0164
+<Multi_key> <less> <t> : "ť" U0165
+<Multi_key> <t> <less> : "ť" U0165
+<Multi_key> <T> <minus> : "Ŧ" U0166
+<Multi_key> <T> <slash> : "Ŧ" U0166
+<Multi_key> <t> <minus> : "ŧ" U0167
+<Multi_key> <t> <slash> : "ŧ" U0167
+<Multi_key> <U> <asciitilde> : "Ũ" U0168
+<Multi_key> <u> <asciitilde> : "ũ" U0169
+<Multi_key> <minus> <U> : "Ū" U016A
+<Multi_key> <U> <minus> : "Ū" U016A
+<Multi_key> <U> <underscore> : "Ū" U016A
+<Multi_key> <minus> <u> : "ū" U016B
+<Multi_key> <u> <minus> : "ū" U016B
+<Multi_key> <u> <underscore> : "ū" U016B
+<Multi_key> <asterisk> <U> : "Ů" U016E
+<Multi_key> <U> <asterisk> : "Ů" U016E
+<Multi_key> <asterisk> <u> : "ů" U016F
+<Multi_key> <u> <asterisk> : "ů" U016F
+<Multi_key> <comma> <U> : "Ų" U0172
+<Multi_key> <U> <comma> : "Ų" U0172
+<Multi_key> <comma> <u> : "ų" U0173
+<Multi_key> <u> <comma> : "ų" U0173
+<Multi_key> <W> <asciicircum> : "Ŵ" U0174
+<Multi_key> <w> <asciicircum> : "ŵ" U0175
+<Multi_key> <Y> <asciicircum> : "Ŷ" U0176
+<Multi_key> <y> <asciicircum> : "ŷ" U0177
+<Multi_key> <Y> <quotedbl> : "Ÿ" U0178
+<Multi_key> <Y> <diaeresis> : "Ÿ" U0178
+<Multi_key> <diaeresis> <Y> : "Ÿ" U0178
+<Multi_key> <Z> <apostrophe> : "Ź" U0179
+<Multi_key> <z> <apostrophe> : "ź" U017A
+<Multi_key> <Z> <period> : "Ż" U017B
+<Multi_key> <z> <period> : "ż" U017C
+<Multi_key> <less> <Z> : "Ž" U017D
+<Multi_key> <Z> <less> : "Ž" U017D
+<Multi_key> <v> <Z> : "Ž" U017D
+<Multi_key> <less> <z> : "ž" U017E
+<Multi_key> <v> <z> : "ž" U017E
+<Multi_key> <z> <less> : "ž" U017E
+<dead_acute> <dead_diaeresis> <space> : "΅" U0385
+<dead_diaeresis> <dead_acute> <space> : "΅" U0385
+<Multi_key> <quotedbl> <apostrophe> <space> : "΅" U0385
+<Multi_key> <apostrophe> <quotedbl> <space> : "΅" U0385
+<Multi_key> <Greek_ALPHA> <apostrophe> : "Ά" U0386
+<Multi_key> <m> <period> : "ṁ" U1E41
+<Multi_key> <Greek_EPSILON> <apostrophe> : "Έ" U0388
+<Multi_key> <Greek_ETA> <apostrophe> : "Ή" U0389
+<Multi_key> <Greek_IOTA> <apostrophe> : "Ί" U038A
+<Multi_key> <Greek_OMICRON> <apostrophe> : "Ό" U038C
+<Multi_key> <Greek_UPSILON> <apostrophe> : "Ύ" U038E
+<Multi_key> <Greek_OMEGA> <apostrophe> : "Ώ" U038F
+<dead_diaeresis> <dead_acute> <Greek_iota> : "ΐ" U0390
+<Multi_key> <quotedbl> <apostrophe> <Greek_iota> : "ΐ" U0390
+<Multi_key> <comma> <i> : "į" U012F
+<Multi_key> <i> <comma> : "į" U012F
+<Multi_key> <Greek_IOTA> <quotedbl> : "Ϊ" U03AA
+<Multi_key> <Greek_UPSILON> <quotedbl> : "Ϋ" U03AB
+<Multi_key> <Greek_alpha> <apostrophe> : "ά" U03AC
+<Multi_key> <Greek_epsilon> <apostrophe> : "έ" U03AD
+<Multi_key> <Greek_eta> <apostrophe> : "ή" U03AE
+<Multi_key> <Greek_iota> <apostrophe> : "ί" U03AF
+<dead_diaeresis> <dead_acute> <Greek_upsilon> : "ΰ" U03B0
+<Multi_key> <quotedbl> <apostrophe> <Greek_upsilon> : "ΰ" U03B0
+<Multi_key> <Greek_iota> <quotedbl> : "ϊ" U03CA
+<Multi_key> <Greek_upsilon> <quotedbl> : "ϋ" U03CB
+<Multi_key> <Greek_omicron> <apostrophe> : "ό" U03CC
+<Multi_key> <Greek_upsilon> <apostrophe> : "ύ" U03CD
+<Multi_key> <Greek_omega> <apostrophe> : "ώ" U03CE
+++ /dev/null
-#
-# This file contains the compose sequences that GTK+ used to have until GTK+ 2.12
-# but are not found anymore at the upstream Compose file at X.Org.
-# When updating gtkimcontextsimpleseqs.h with compose-parse.py,
-# we include this file as well. There are 15 conflicts currently
-# in the compose sequences, and we currently favour the sequences from
-# this file (against the upstream X.Org file). For more, see
-# http://bugzilla.gnome.org/show_bug.cgi?id=557420
-#
-
-<Greek_accentdieresis> <Greek_iota> : "ἴ" U0390
-<Greek_accentdieresis> <Greek_upsilon> : "ΐ" U03B0
-<Multi_key> <B> <period> : "Ḃ" U1E02
-<Multi_key> <b> <period> : "ḃ" U1E03
-<Multi_key> <D> <period> : "Ḋ" U1E0A
-<Multi_key> <d> <period> : "ḋ" U1E0B
-<Multi_key> <F> <period> : "Ḟ" U1E1E
-<Multi_key> <f> <period> : "ḟ" U1E1F
-<Multi_key> <M> <period> : "Ṁ" U1E40
-<Multi_key> <S> <period> : "Ṡ" U1E60
-<Multi_key> <P> <period> : "Ṗ" U1E56
-<Multi_key> <p> <period> : "ṗ" U1E57
-<Multi_key> <s> <period> : "ṡ" U1E61
-<Multi_key> <T> <period> : "Ṫ" U1E6A
-<Multi_key> <t> <period> : "ṫ" U1E6B
-<Multi_key> <e> <period> : "ė" U0117
-<Multi_key> <C> <bar> : "¢" U00A2
-<Multi_key> <bar> <C> : "¢" U00A2
-<Multi_key> <minus> <l> : "£" U00A3
-<Multi_key> <equal> <l> : "£" U00A3
-<Multi_key> <L> <equal> : "£" U00A3
-<Multi_key> <l> <minus> : "£" U00A3
-<Multi_key> <l> <equal> : "£" U00A3
-<Multi_key> <0> <X> : "¤" U00A4
-<Multi_key> <0> <x> : "¤" U00A4
-<Multi_key> <O> <X> : "¤" U00A4
-<Multi_key> <O> <x> : "¤" U00A4
-<Multi_key> <X> <0> : "¤" U00A4
-<Multi_key> <X> <O> : "¤" U00A4
-<Multi_key> <X> <o> : "¤" U00A4
-<Multi_key> <o> <X> : "¤" U00A4
-<Multi_key> <x> <0> : "¤" U00A4
-<Multi_key> <x> <O> : "¤" U00A4
-<Multi_key> <minus> <Y> : "¥" U00A5
-<Multi_key> <minus> <y> : "¥" U00A5
-<Multi_key> <equal> <y> : "¥" U00A5
-<Multi_key> <Y> <minus> : "¥" U00A5
-<Multi_key> <y> <minus> : "¥" U00A5
-<Multi_key> <y> <equal> : "¥" U00A5
-<Multi_key> <0> <S> : "§" U00A7
-<Multi_key> <0> <s> : "§" U00A7
-<Multi_key> <O> <S> : "§" U00A7
-<Multi_key> <S> <exclam> : "§" U00A7
-<Multi_key> <S> <0> : "§" U00A7
-<Multi_key> <S> <O> : "§" U00A7
-<Multi_key> <s> <exclam> : "§" U00A7
-<Multi_key> <s> <0> : "§" U00A7
-<Multi_key> <quotedbl> <quotedbl> : "¨" U00A8
-<Multi_key> <parenleft> <c> : "©" U00A9
-<Multi_key> <0> <C> : "©" U00A9
-<Multi_key> <0> <c> : "©" U00A9
-<Multi_key> <C> <0> : "©" U00A9
-<Multi_key> <C> <O> : "©" U00A9
-<Multi_key> <C> <o> : "©" U00A9
-<Multi_key> <c> <0> : "©" U00A9
-<Multi_key> <A> <underscore> : "ª" U00AA
-<Multi_key> <a> <underscore> : "ª" U00AA
-<Multi_key> <C> <comma> : "Ç" U00C7
-<Multi_key> <minus> <minus> <space> : "" U00AD
-<Multi_key> <parenleft> <r> : "®" U00AE
-<Multi_key> <R> <O> : "®" U00AE
-<Multi_key> <minus> <asciicircum> : "¯" U00AF
-<Multi_key> <asciicircum> <minus> : "¯" U00AF
-<Multi_key> <asciicircum> <underscore> : "¯" U00AF
-<Multi_key> <underscore> <asciicircum> : "¯" U00AF
-<Multi_key> <underscore> <underscore> : "¯" U00AF
-<Multi_key> <asterisk> <0> : "°" U00B0
-<Multi_key> <0> <asterisk> : "°" U00B0
-<Multi_key> <0> <asciicircum> : "°" U00B0
-<Multi_key> <minus> <plus> : "±" U00B1
-<Multi_key> <2> <S> : "²" U00B2
-<Multi_key> <2> <asciicircum> : "²" U00B2
-<Multi_key> <2> <s> : "²" U00B2
-<Multi_key> <S> <2> : "²" U00B2
-<Multi_key> <s> <2> : "²" U00B2
-<Multi_key> <3> <S> : "³" U00B3
-<Multi_key> <3> <asciicircum> : "³" U00B3
-<Multi_key> <3> <s> : "³" U00B3
-<Multi_key> <S> <3> : "³" U00B3
-<Multi_key> <s> <3> : "³" U00B3
-<Multi_key> <apostrophe> <apostrophe> : "´" U00B4
-<Multi_key> <slash> <U> : "µ" U00B5
-<Multi_key> <slash> <u> : "µ" U00B5
-<Multi_key> <U> <slash> : "µ" U00B5
-<Multi_key> <u> <slash> : "µ" U00B5
-<Multi_key> <exclam> <P> : "¶" U00B6
-<Multi_key> <exclam> <p> : "¶" U00B6
-<Multi_key> <period> <asciicircum> : "·" U00B7
-<Multi_key> <asciicircum> <period> : "·" U00B7
-<Multi_key> <comma> <comma> : "¸" U00B8
-<Multi_key> <1> <S> : "¹" U00B9
-<Multi_key> <1> <asciicircum> : "¹" U00B9
-<Multi_key> <1> <s> : "¹" U00B9
-<Multi_key> <S> <1> : "¹" U00B9
-<Multi_key> <s> <1> : "¹" U00B9
-<Multi_key> <O> <underscore> : "º" U00BA
-<Multi_key> <o> <underscore> : "º" U00BA
-<Multi_key> <A> <grave> : "À" U00C0
-<Multi_key> <A> <apostrophe> : "Á" U00C1
-<Multi_key> <A> <acute> : "Á" U00C1
-<Multi_key> <greater> <A> : "Â" U00C2
-<Multi_key> <A> <greater> : "Â" U00C2
-<Multi_key> <A> <asciicircum> : "Â" U00C2
-<Multi_key> <minus> <A> : "Ã" U00C3
-<Multi_key> <A> <minus> : "Ã" U00C3
-<Multi_key> <A> <asciitilde> : "Ã" U00C3
-<Multi_key> <A> <quotedbl> : "Ä" U00C4
-<Multi_key> <A> <diaeresis> : "Ä" U00C4
-<Multi_key> <diaeresis> <A> : "Ä" U00C4
-<Multi_key> <asterisk> <A> : "Å" U00C5
-<Multi_key> <A> <asterisk> : "Å" U00C5
-<Multi_key> <A> <A> : "Å" U00C5
-<Multi_key> <space> <less> : "ˇ" U02C7
-<Multi_key> <less> <space> : "ˇ" U02C7
-<Multi_key> <E> <grave> : "È" U00C8
-<Multi_key> <E> <apostrophe> : "É" U00C9
-<Multi_key> <E> <acute> : "É" U00C9
-<Multi_key> <greater> <E> : "Ê" U00CA
-<Multi_key> <E> <greater> : "Ê" U00CA
-<Multi_key> <E> <asciicircum> : "Ê" U00CA
-<Multi_key> <E> <quotedbl> : "Ë" U00CB
-<Multi_key> <E> <diaeresis> : "Ë" U00CB
-<Multi_key> <diaeresis> <E> : "Ë" U00CB
-<Multi_key> <I> <grave> : "Ì" U00CC
-<Multi_key> <I> <apostrophe> : "Í" U00CD
-<Multi_key> <I> <acute> : "Í" U00CD
-<Multi_key> <greater> <I> : "Î" U00CE
-<Multi_key> <I> <greater> : "Î" U00CE
-<Multi_key> <I> <asciicircum> : "Î" U00CE
-<Multi_key> <I> <quotedbl> : "Ï" U00CF
-<Multi_key> <I> <diaeresis> : "Ï" U00CF
-<Multi_key> <diaeresis> <I> : "Ï" U00CF
-<Multi_key> <minus> <N> : "Ñ" U00D1
-<Multi_key> <N> <minus> : "Ñ" U00D1
-<Multi_key> <N> <asciitilde> : "Ñ" U00D1
-<Multi_key> <O> <grave> : "Ò" U00D2
-<Multi_key> <O> <apostrophe> : "Ó" U00D3
-<Multi_key> <O> <acute> : "Ó" U00D3
-<Multi_key> <greater> <O> : "Ô" U00D4
-<Multi_key> <O> <greater> : "Ô" U00D4
-<Multi_key> <O> <asciicircum> : "Ô" U00D4
-<Multi_key> <minus> <O> : "Õ" U00D5
-<Multi_key> <O> <minus> : "Õ" U00D5
-<Multi_key> <O> <asciitilde> : "Õ" U00D5
-<Multi_key> <O> <quotedbl> : "Ö" U00D6
-<Multi_key> <O> <diaeresis> : "Ö" U00D6
-<Multi_key> <diaeresis> <O> : "Ö" U00D6
-<Multi_key> <space> <parenleft> : "˘" U02D8
-<Multi_key> <parenleft> <space> : "˘" U02D8
-<Multi_key> <U> <grave> : "Ù" U00D9
-<Multi_key> <U> <apostrophe> : "Ú" U00DA
-<Multi_key> <U> <acute> : "Ú" U00DA
-<Multi_key> <greater> <U> : "Û" U00DB
-<Multi_key> <U> <greater> : "Û" U00DB
-<Multi_key> <U> <asciicircum> : "Û" U00DB
-<Multi_key> <U> <quotedbl> : "Ü" U00DC
-<Multi_key> <U> <diaeresis> : "Ü" U00DC
-<Multi_key> <diaeresis> <U> : "Ü" U00DC
-<Multi_key> <Y> <apostrophe> : "Ý" U00DD
-<Multi_key> <Y> <acute> : "Ý" U00DD
-<Multi_key> <a> <grave> : "à" U00E0
-<Multi_key> <a> <apostrophe> : "á" U00E1
-<Multi_key> <a> <acute> : "á" U00E1
-<Multi_key> <greater> <a> : "â" U00E2
-<Multi_key> <a> <greater> : "â" U00E2
-<Multi_key> <a> <asciicircum> : "â" U00E2
-<Multi_key> <minus> <a> : "ā" U0101
-<Multi_key> <a> <minus> : "ā" U0101
-<Multi_key> <a> <asciitilde> : "ã" U00E3
-<Multi_key> <a> <quotedbl> : "ä" U00E4
-<Multi_key> <a> <diaeresis> : "ä" U00E4
-<Multi_key> <diaeresis> <a> : "ä" U00E4
-<Multi_key> <asterisk> <a> : "å" U00E5
-<Multi_key> <a> <asterisk> : "å" U00E5
-<Multi_key> <a> <a> : "å" U00E5
-<Multi_key> <c> <comma> : "ç" U00E7
-<Multi_key> <e> <grave> : "è" U00E8
-<Multi_key> <e> <apostrophe> : "é" U00E9
-<Multi_key> <e> <acute> : "é" U00E9
-<Multi_key> <greater> <e> : "ê" U00EA
-<Multi_key> <e> <greater> : "ê" U00EA
-<Multi_key> <e> <asciicircum> : "ê" U00EA
-<Multi_key> <e> <quotedbl> : "ë" U00EB
-<Multi_key> <e> <diaeresis> : "ë" U00EB
-<Multi_key> <diaeresis> <e> : "ë" U00EB
-<Multi_key> <i> <grave> : "ì" U00EC
-<Multi_key> <i> <apostrophe> : "í" U00ED
-<Multi_key> <i> <acute> : "í" U00ED
-<Multi_key> <greater> <i> : "î" U00EE
-<Multi_key> <i> <greater> : "î" U00EE
-<Multi_key> <i> <asciicircum> : "î" U00EE
-<Multi_key> <i> <quotedbl> : "ï" U00EF
-<Multi_key> <i> <diaeresis> : "ï" U00EF
-<Multi_key> <diaeresis> <i> : "ï" U00EF
-<Multi_key> <minus> <n> : "ñ" U00F1
-<Multi_key> <n> <minus> : "ñ" U00F1
-<Multi_key> <n> <asciitilde> : "ñ" U00F1
-<Multi_key> <o> <grave> : "ò" U00F2
-<Multi_key> <o> <apostrophe> : "ó" U00F3
-<Multi_key> <o> <acute> : "ó" U00F3
-<Multi_key> <greater> <o> : "ô" U00F4
-<Multi_key> <o> <greater> : "ô" U00F4
-<Multi_key> <o> <asciicircum> : "ô" U00F4
-<Multi_key> <minus> <o> : "ō" U014D
-<Multi_key> <o> <minus> : "ō" U014D
-<Multi_key> <o> <asciitilde> : "õ" U00F5
-<Multi_key> <o> <quotedbl> : "ö" U00F6
-<Multi_key> <o> <diaeresis> : "ö" U00F6
-<Multi_key> <diaeresis> <o> : "ö" U00F6
-<Multi_key> <o> <slash> : "ø" U00F8
-<Multi_key> <u> <grave> : "ù" U00F9
-<Multi_key> <u> <apostrophe> : "ú" U00FA
-<Multi_key> <u> <acute> : "ú" U00FA
-<Multi_key> <greater> <u> : "û" U00FB
-<Multi_key> <u> <greater> : "û" U00FB
-<Multi_key> <u> <asciicircum> : "û" U00FB
-<Multi_key> <u> <quotedbl> : "ü" U00FC
-<Multi_key> <u> <diaeresis> : "ü" U00FC
-<Multi_key> <diaeresis> <u> : "ü" U00FC
-<Multi_key> <y> <apostrophe> : "ý" U00FD
-<Multi_key> <y> <acute> : "ý" U00FD
-<Multi_key> <y> <quotedbl> : "ÿ" U00FF
-<Multi_key> <y> <diaeresis> : "ÿ" U00FF
-<Multi_key> <diaeresis> <y> : "ÿ" U00FF
-<Multi_key> <parenleft> <A> : "Ă" U0102
-<Multi_key> <A> <parenleft> : "Ă" U0102
-<Multi_key> <parenleft> <a> : "ă" U0103
-<Multi_key> <a> <parenleft> : "ă" U0103
-<Multi_key> <comma> <A> : "Ą" U0104
-<Multi_key> <A> <comma> : "Ą" U0104
-<Multi_key> <comma> <a> : "ą" U0105
-<Multi_key> <a> <comma> : "ą" U0105
-<Multi_key> <C> <apostrophe> : "Ć" U0106
-<Multi_key> <c> <apostrophe> : "ć" U0107
-<Multi_key> <C> <period> : "Ċ" U010A
-<Multi_key> <c> <period> : "ċ" U010B
-<Multi_key> <less> <C> : "Č" U010C
-<Multi_key> <C> <less> : "Č" U010C
-<Multi_key> <less> <c> : "č" U010D
-<Multi_key> <c> <less> : "č" U010D
-<Multi_key> <less> <D> : "Ď" U010E
-<Multi_key> <D> <less> : "Ď" U010E
-<Multi_key> <less> <d> : "ď" U010F
-<Multi_key> <d> <less> : "ď" U010F
-<Multi_key> <minus> <D> : "Đ" U0110
-<Multi_key> <D> <minus> : "Đ" U0110
-<Multi_key> <minus> <d> : "đ" U0111
-<Multi_key> <minus> <E> : "Ē" U0112
-<Multi_key> <E> <minus> : "Ē" U0112
-<Multi_key> <E> <underscore> : "Ē" U0112
-<Multi_key> <minus> <e> : "ē" U0113
-<Multi_key> <e> <minus> : "ē" U0113
-<Multi_key> <e> <underscore> : "ē" U0113
-<Multi_key> <E> <period> : "Ė" U0116
-<Multi_key> <E> <comma> : "Ę" U0118
-<Multi_key> <e> <comma> : "ę" U0119
-<Multi_key> <less> <E> : "Ě" U011A
-<Multi_key> <E> <less> : "Ě" U011A
-<Multi_key> <less> <e> : "ě" U011B
-<Multi_key> <e> <less> : "ě" U011B
-<Multi_key> <parenleft> <G> : "Ğ" U011E
-<Multi_key> <G> <parenleft> : "Ğ" U011E
-<Multi_key> <G> <U> : "Ğ" U011E
-<Multi_key> <G> <breve> : "Ğ" U011E
-<Multi_key> <breve> <G> : "Ğ" U011E
-<Multi_key> <parenleft> <g> : "ğ" U011F
-<Multi_key> <g> <parenleft> : "ğ" U011F
-<Multi_key> <g> <U> : "ğ" U011F
-<Multi_key> <g> <breve> : "ğ" U011F
-<Multi_key> <breve> <g> : "ğ" U011F
-<Multi_key> <G> <period> : "Ġ" U0120
-<Multi_key> <g> <period> : "ġ" U0121
-<Multi_key> <G> <comma> : "Ģ" U0122
-<Multi_key> <g> <comma> : "ģ" U0123
-<Multi_key> <I> <asciitilde> : "Ĩ" U0128
-<Multi_key> <i> <asciitilde> : "ĩ" U0129
-<Multi_key> <minus> <I> : "Ī" U012A
-<Multi_key> <I> <minus> : "Ī" U012A
-<Multi_key> <I> <underscore> : "Ī" U012A
-<Multi_key> <minus> <i> : "ī" U012B
-<Multi_key> <i> <minus> : "ī" U012B
-<Multi_key> <i> <underscore> : "ī" U012B
-<Multi_key> <comma> <I> : "Į" U012E
-<Multi_key> <I> <comma> : "Į" U012E
-<Multi_key> <I> <period> : "İ" U0130
-<Multi_key> <period> <i> : "ı" U0131
-<Multi_key> <K> <comma> : "Ķ" U0136
-<Multi_key> <k> <comma> : "ķ" U0137
-<Multi_key> <L> <apostrophe> : "Ĺ" U0139
-<Multi_key> <l> <apostrophe> : "ĺ" U013A
-<Multi_key> <L> <comma> : "Ļ" U013B
-<Multi_key> <l> <comma> : "ļ" U013C
-<Multi_key> <less> <L> : "Ľ" U013D
-<Multi_key> <L> <less> : "Ľ" U013D
-<Multi_key> <less> <l> : "ľ" U013E
-<Multi_key> <l> <less> : "ľ" U013E
-<Multi_key> <L> <slash> : "Ł" U0141
-<Multi_key> <l> <slash> : "ł" U0142
-<Multi_key> <N> <apostrophe> : "Ń" U0143
-<Multi_key> <n> <apostrophe> : "ń" U0144
-<Multi_key> <N> <comma> : "Ņ" U0145
-<Multi_key> <n> <comma> : "ņ" U0146
-<Multi_key> <less> <N> : "Ň" U0147
-<Multi_key> <N> <less> : "Ň" U0147
-<Multi_key> <less> <n> : "ň" U0148
-<Multi_key> <n> <less> : "ň" U0148
-<Multi_key> <R> <apostrophe> : "Ŕ" U0154
-<Multi_key> <r> <apostrophe> : "ŕ" U0155
-<Multi_key> <R> <comma> : "Ŗ" U0156
-<Multi_key> <r> <comma> : "ŗ" U0157
-<Multi_key> <less> <R> : "Ř" U0158
-<Multi_key> <R> <less> : "Ř" U0158
-<Multi_key> <less> <r> : "ř" U0159
-<Multi_key> <r> <less> : "ř" U0159
-<Multi_key> <S> <apostrophe> : "Ś" U015A
-<Multi_key> <s> <apostrophe> : "ś" U015B
-<Multi_key> <O> <slash> : "Ø" U00D8
-<Multi_key> <S> <comma> : "Ş" U015E
-<Multi_key> <S> <cedilla> : "Ş" U015E
-<Multi_key> <s> <comma> : "ş" U015F
-<Multi_key> <s> <cedilla> : "ş" U015F
-<Multi_key> <less> <S> : "Š" U0160
-<Multi_key> <S> <less> : "Š" U0160
-<Multi_key> <less> <s> : "š" U0161
-<Multi_key> <s> <less> : "š" U0161
-<Multi_key> <less> <T> : "Ť" U0164
-<Multi_key> <T> <less> : "Ť" U0164
-<Multi_key> <less> <t> : "ť" U0165
-<Multi_key> <t> <less> : "ť" U0165
-<Multi_key> <T> <minus> : "Ŧ" U0166
-<Multi_key> <T> <slash> : "Ŧ" U0166
-<Multi_key> <t> <minus> : "ŧ" U0167
-<Multi_key> <t> <slash> : "ŧ" U0167
-<Multi_key> <U> <asciitilde> : "Ũ" U0168
-<Multi_key> <u> <asciitilde> : "ũ" U0169
-<Multi_key> <minus> <U> : "Ū" U016A
-<Multi_key> <U> <minus> : "Ū" U016A
-<Multi_key> <U> <underscore> : "Ū" U016A
-<Multi_key> <minus> <u> : "ū" U016B
-<Multi_key> <u> <minus> : "ū" U016B
-<Multi_key> <u> <underscore> : "ū" U016B
-<Multi_key> <asterisk> <U> : "Ů" U016E
-<Multi_key> <U> <asterisk> : "Ů" U016E
-<Multi_key> <asterisk> <u> : "ů" U016F
-<Multi_key> <u> <asterisk> : "ů" U016F
-<Multi_key> <comma> <U> : "Ų" U0172
-<Multi_key> <U> <comma> : "Ų" U0172
-<Multi_key> <comma> <u> : "ų" U0173
-<Multi_key> <u> <comma> : "ų" U0173
-<Multi_key> <W> <asciicircum> : "Ŵ" U0174
-<Multi_key> <w> <asciicircum> : "ŵ" U0175
-<Multi_key> <Y> <asciicircum> : "Ŷ" U0176
-<Multi_key> <y> <asciicircum> : "ŷ" U0177
-<Multi_key> <Y> <quotedbl> : "Ÿ" U0178
-<Multi_key> <Y> <diaeresis> : "Ÿ" U0178
-<Multi_key> <diaeresis> <Y> : "Ÿ" U0178
-<Multi_key> <Z> <apostrophe> : "Ź" U0179
-<Multi_key> <z> <apostrophe> : "ź" U017A
-<Multi_key> <Z> <period> : "Ż" U017B
-<Multi_key> <z> <period> : "ż" U017C
-<Multi_key> <less> <Z> : "Ž" U017D
-<Multi_key> <Z> <less> : "Ž" U017D
-<Multi_key> <v> <Z> : "Ž" U017D
-<Multi_key> <less> <z> : "ž" U017E
-<Multi_key> <v> <z> : "ž" U017E
-<Multi_key> <z> <less> : "ž" U017E
-<dead_acute> <dead_diaeresis> <space> : "΅" U0385
-<dead_diaeresis> <dead_acute> <space> : "΅" U0385
-<Multi_key> <quotedbl> <apostrophe> <space> : "΅" U0385
-<Multi_key> <apostrophe> <quotedbl> <space> : "΅" U0385
-<Multi_key> <Greek_ALPHA> <apostrophe> : "Ά" U0386
-<Multi_key> <m> <period> : "ṁ" U1E41
-<Multi_key> <Greek_EPSILON> <apostrophe> : "Έ" U0388
-<Multi_key> <Greek_ETA> <apostrophe> : "Ή" U0389
-<Multi_key> <Greek_IOTA> <apostrophe> : "Ί" U038A
-<Multi_key> <Greek_OMICRON> <apostrophe> : "Ό" U038C
-<Multi_key> <Greek_UPSILON> <apostrophe> : "Ύ" U038E
-<Multi_key> <Greek_OMEGA> <apostrophe> : "Ώ" U038F
-<dead_diaeresis> <dead_acute> <Greek_iota> : "ΐ" U0390
-<Multi_key> <quotedbl> <apostrophe> <Greek_iota> : "ΐ" U0390
-<Multi_key> <comma> <i> : "į" U012F
-<Multi_key> <i> <comma> : "į" U012F
-<Multi_key> <Greek_IOTA> <quotedbl> : "Ϊ" U03AA
-<Multi_key> <Greek_UPSILON> <quotedbl> : "Ϋ" U03AB
-<Multi_key> <Greek_alpha> <apostrophe> : "ά" U03AC
-<Multi_key> <Greek_epsilon> <apostrophe> : "έ" U03AD
-<Multi_key> <Greek_eta> <apostrophe> : "ή" U03AE
-<Multi_key> <Greek_iota> <apostrophe> : "ί" U03AF
-<dead_diaeresis> <dead_acute> <Greek_upsilon> : "ΰ" U03B0
-<Multi_key> <quotedbl> <apostrophe> <Greek_upsilon> : "ΰ" U03B0
-<Multi_key> <Greek_iota> <quotedbl> : "ϊ" U03CA
-<Multi_key> <Greek_upsilon> <quotedbl> : "ϋ" U03CB
-<Multi_key> <Greek_omicron> <apostrophe> : "ό" U03CC
-<Multi_key> <Greek_upsilon> <apostrophe> : "ύ" U03CD
-<Multi_key> <Greek_omega> <apostrophe> : "ώ" U03CE